Mercurial > repos > abims-sbr > pairwise
annotate scripts/S08_script_extract_match_v20_blastx.py @ 8:471ed956ff13 draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
| author | abims-sbr |
|---|---|
| date | Wed, 28 Feb 2018 10:37:14 -0500 |
| parents | 6709645eff5d |
| children |
| rev | line source |
|---|---|
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
2 ## AUTHOR: Eric Fontanillas |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
3 ## LAST VERSION: 14/08/14 by Julie BAFFARD |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
4 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
5 ### TBLASTX formatting |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
6 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
7 ### MATCH = Only the first match keeped |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
8 MATCH = 0 # Only 1rst match Wanted |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
9 #MATCH = 1 # All match wanted |
|
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
10 |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
11 ### SUBMATCH = several part of a same sequence match with the query |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
12 SUBMATCH = 0 # SUBMATCH NOT WANTED (ONLY 1rst HIT) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
13 #SUBMATCH =1 # SUBMATCH WANTED |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
14 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
15 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
16 ### NAME FORMATTING: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
17 # [A] FORMAT QUERY NAME 1st STEP [IN DEF1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
18 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
19 # [B] FORMAT MATCH NAME 1st STEP [IN DEF2.1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
20 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
21 # [C] FORMAT MATCH NAME 2nd STEP [MIDDLE of DEF 2.3] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
22 # [D] FORMAT QUERY NAME 2nd STEP [END of DEF 2.3] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
23 # [E] FORMAT MATCH NAME 3rd STEP [END of DEF 2.3] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
24 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
25 ### SPECIFICITY TBLASTX (/BLASTN) formatting: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
26 ## 1/ "TBLASTX" formatting => At start of "RUN RUN RUN" change the keyword |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
27 ## 2/ change line "if keyword in nextline:" in function "split_file" |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
28 ## 3/ change "Strand" by "Frame" in function "get_information_on_matches" |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
29 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
30 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
31 ############################ |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
32 ### DEF4 : get sequences ### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
33 ############################ |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
34 ### [+ get informations from the DEF3.] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
35 def get_sequences(query, list2, SUBMATCHEU, WORK_DIR): |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
36 list_Pairwise = [] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
37 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
38 F7 = open("%s/blastRun3.tmp" %WORK_DIR, 'w') |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
39 F7.write(bash1[query]) # bash1[query] ==> blast output for each query |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
40 F7.close() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
41 F8 = open("%s/blastRun3.tmp" %WORK_DIR, 'r') |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
42 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
43 text1 = F8.readlines() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
44 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
45 miniList = [] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
46 for name in list2: # "list2" contains name of matched sequences (long version! the list1 is the same list but for short version names). It was previously generated by "detect_Matches" function |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
47 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
48 l = -1 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
49 for n in text1: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
50 l = l+1 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
51 if name in n: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
52 i = l |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
53 miniList.append(i) # content positions in the list "text1", of all begining of match (e.g. >gnl|UG|Apo#S51012099 [...]) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
54 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
55 miniList.reverse() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
56 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
57 if miniList != []: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
58 length = len(miniList) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
59 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
60 ii = 0 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
61 Listing1 = [] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
62 while ii < length: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
63 iii = miniList[ii] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
64 entry = text1[iii:] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
65 text1 = text1[:iii] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
66 Listing1.append(entry) # each "entry" = list of thing beginning by ">" |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
67 ii = ii+1 # Listing1 is a table of table!! |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
68 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
69 Listing1.append(text1) # "text1" = the first lines (begin with "BLASTN 2.2.1 ...]" |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
70 Listing1.reverse() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
71 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
72 Listing2 = Listing1[1:] # remove the first thing ("BLASTN ...") and keep only table beginning with a line with ">" |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
73 SEK = len(Listing2) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
74 NB_SEK = 0 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
75 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
76 for e1 in Listing2: # "Listing2" contents all the entries begining with ">" |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
77 NB_SEK = NB_SEK + 1 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
78 list51 = [] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
79 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
80 l = -1 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
81 for line in e1: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
82 l = l+1 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
83 if "Score =" in line: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
84 index = l |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
85 list51.append(l) # index of the lines with score |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
86 |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
87 list51.reverse() |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
88 Listing3 = [] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
89 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
90 for i5 in list51: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
91 e2 = e1[i5:] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
92 Listing3.append(e2) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
93 e1 = e1[:i5] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
94 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
95 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
96 ###################################### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
97 ### [C] FORMAT MATCH NAME 2nd STEP ### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
98 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
99 BigFastaName = e1 ### LIST OF LINES <=> What is remaining after removing all the hit with "Score =", so all the text comprise between ">" and the first "Score =" ==> Include Match name & "Length & empty lines |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
100 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
101 SmallFastaName = BigFastaName[0] ## First line <=> MATCH NAME |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
102 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
103 SmallFastaName = SmallFastaName[1:-2] ### remove ">" and "\n" |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
104 |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
105 """ |
|
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
106 3 lines below : only difference with S05 |
|
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
107 """ |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
108 S1 = string.split(SmallFastaName, "||") |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
109 S2 = string.split(S1[0], " ") |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
110 PutInFastaName1 = S2[0] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
111 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
112 ### [C] END FORMAT MATCH NAME 2nd STEP ### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
113 ########################################## |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
114 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
115 SUBSEK = len(Listing3) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
116 NB_SUBSEK = 0 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
117 list_inBatch = [] |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
118 |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
119 ### IF NO SUBMATCH WANTED !!!! => ONLY KEEP THE FIRST HIT OF "LISTING3": |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
120 if SUBMATCHEU == 0: # NO SUBMATCH WANTED !!!! |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
121 Listing4 = [] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
122 Listing4.append(Listing3[-1]) # Remove this line if submatch wanted!!! |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
123 elif SUBMATCHEU == 1: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
124 Listing4 = Listing3 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
125 |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
126 for l in Listing4: ## "listing3" contents |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
127 NB_SUBSEK = NB_SUBSEK+1 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
128 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
129 ll1 = string.replace(l[0], " ", "") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
130 ll2 = string.replace(l[1], " ", "") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
131 ll3 = string.replace(l[2], " ", "") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
132 PutInFastaName2 = ll1[:-1] + "||" + ll2[:-1] + "||" + ll3[:-1] # match information |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
133 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
134 seq_query = "" |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
135 pos_query = [] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
136 seq_match = "" |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
137 pos_match = [] |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
138 |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
139 for line in l: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
140 if "Query:" in line: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
141 line = string.replace(line, " ", " ") # remove multiple spaces in line |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
142 line = string.replace(line, " ", " ") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
143 line = string.replace(line, " ", " ") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
144 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
145 lll1 = string.split(line, " ") # split the line, 0: "Query=", 1:start, 2:seq, 3:end |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
146 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
147 pos1 = lll1[1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
148 pos1 = string.atoi(pos1) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
149 pos_query.append(pos1) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
150 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
151 pos2 = lll1[3][:-1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
152 pos2 = string.atoi(pos2) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
153 pos_query.append(pos2) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
154 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
155 seq = lll1[2] |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
156 seq_query = seq_query + seq |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
157 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
158 if "Sbjct:" in line: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
159 line = string.replace(line, " ", " ") # remove multiple spaces in line |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
160 line = string.replace(line, " ", " ") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
161 line = string.replace(line, " ", " ") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
162 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
163 lll2 = string.split(line, " ") # split the line, 0: "Query=", 1:start, 2:seq, 3:end |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
164 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
165 pos1 = lll2[1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
166 pos1 = string.atoi(pos1) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
167 pos_match.append(pos1) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
168 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
169 pos2 = lll2[3][:-1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
170 pos2 = string.atoi(pos2) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
171 pos_match.append(pos2) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
172 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
173 seq = lll2[2] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
174 seq_match = seq_match + seq |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
175 |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
176 ## Get the query and matched sequences and the corresponding positions |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
177 pos_query.sort() # rank small to big |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
178 pos_query_start = pos_query[0] # get the smaller |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
179 pos_query_end = pos_query[-1] # get the bigger |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
180 PutInFastaName3 = "%d...%d" %(pos_query_start, pos_query_end) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
181 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
182 ###################################### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
183 ### [D] FORMAT QUERY NAME 2nd STEP ### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
184 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
185 FINAL_fasta_Name_Query = ">" + query + "||"+ PutInFastaName3 + "||[[%d/%d]][[%d/%d]]" %(NB_SEK, SEK, NB_SUBSEK,SUBSEK) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
186 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
187 ### [D] END FORMAT QUERY NAME 2nd STEP ### |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
188 ########################################## |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
189 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
190 pos_match.sort() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
191 pos_match_start = pos_match[0] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
192 pos_match_end = pos_match[-1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
193 PutInFastaName4 = "%d...%d" %(pos_match_start, pos_match_end) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
194 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
195 ###################################### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
196 ### [E] FORMAT MATCH NAME 3rd STEP ### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
197 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
198 FINAL_fasta_Name_Match = ">" + PutInFastaName1 + "||" + PutInFastaName4 + "||[[%d/%d]][[%d/%d]]" %(NB_SEK, SEK, NB_SUBSEK,SUBSEK) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
199 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
200 ### [E] END FORMAT MATCH NAME 3rd STEP ### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
201 ########################################## |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
202 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
203 Pairwise = [FINAL_fasta_Name_Query , seq_query , FINAL_fasta_Name_Match , seq_match] # list with 4 members |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
204 list_Pairwise.append(Pairwise) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
205 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
206 ### Get informations about matches |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
207 list_info = get_information_on_matches(l) ### DEF3 ### |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
208 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
209 F8.close() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
210 return(list_Pairwise, list_info) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
211 ######################################### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
212 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
213 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
214 ################### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
215 ### RUN RUN RUN ### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
216 ################### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
217 import string, os, time, re, sys |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
218 from functions import split_file, detect_Matches, get_information_on_matches |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
219 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
220 ## 1 ## INPUT/OUTPUT |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
221 SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
222 |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
223 """ |
|
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
224 04 and 06 for S05, 11 and 13 for S08 |
|
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
225 """ |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
226 path_in = "%s/11_outputBlast_%s.txt" %(SHORT_FILE, SHORT_FILE) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
227 file_out = open("%s/13_PairwiseMatch_%s.fasta" %(SHORT_FILE, SHORT_FILE),"w") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
228 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
229 ## 2 ## RUN |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
230 ## create Bash1 ## |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
231 bash1 = split_file(path_in, "TBLASTX") ### DEF1 ### |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
232 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
233 ## detect and save match ## |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
234 list_hits =[] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
235 list_no_hits = [] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
236 j= 0 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
237 k = 0 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
238 lene = len(bash1.keys()) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
239 for query in bash1.keys(): |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
240 j = j+1 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
241 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
242 ## 2.1. detect matches ## |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
243 list_match, list_match2, hit=detect_Matches(query, MATCH, SHORT_FILE, bash1) ### DEF2 ### |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
244 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
245 if hit == 1: # match(es) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
246 list_hits.append(query) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
247 if hit == 0: # no match for that sequence |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
248 list_no_hits.append(query) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
249 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
250 ## 2.2. get sequences ## |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
251 if hit ==1: |
|
4
6709645eff5d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
1
diff
changeset
|
252 list_pairwiseMatch, list_info = get_sequences(query, list_match2, SUBMATCH, SHORT_FILE) |
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
253 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
254 # divergencve |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
255 divergence = list_info[6] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
256 # gap number |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
257 gap_number = list_info[7] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
258 # real divergence (divergence without accounting INDELs) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
259 real_divergence = list_info[8] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
260 # length matched |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
261 length_matched = list_info[10] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
262 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
263 ### WRITE PAIRWISE ALIGNMENT IN OUTPUT FILES |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
264 for pairwise in list_pairwiseMatch: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
265 k = k+1 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
266 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
267 query_name = pairwise[0] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
268 query_seq = pairwise[1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
269 match_name = pairwise[2] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
270 match_seq = pairwise[3] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
271 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
272 len_query_seq = len(query_seq) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
273 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
274 # If NO CONTROL FOR LENGTH, USE THE FOLLOWING LINES INSTEAD: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
275 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
276 file_out.write("%s||%s||%s||%s||%s" %(query_name,divergence,gap_number,real_divergence,length_matched)) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
277 file_out.write("\n") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
278 file_out.write("%s" %query_seq) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
279 file_out.write("\n") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
280 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
281 file_out.write("%s||%s||%s||%s||%s" %(match_name,divergence,gap_number,real_divergence,length_matched)) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
282 file_out.write("\n") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
283 file_out.write("%s" %match_seq) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
284 file_out.write("\n") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
285 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
286 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
287 file_out.close() |
