annotate scripts/S05_script_extract_match_v20_blastx.py @ 4:6709645eff5d draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
author abims-sbr
date Wed, 17 Jan 2018 08:53:53 -0500
parents c8af52875b0f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
1 #!/usr/bin/env python
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
2 ## AUTHOR: Eric Fontanillas
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
3 ## LAST VERSION: 14/08/14 by Julie BAFFARD
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
4
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
5 ### TBLASTX formatting
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
6
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
7 ### MATCH = Only the first match keeped
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
8 MATCH = 0 # Only 1rst match Wanted
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
9 #MATCH = 1 # All match wanted
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
10
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
11 ### SUBMATCH = several part of a same sequence match with the query
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
12 SUBMATCH = 0 # SUBMATCH NOT WANTED (only the best hit)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
13 #SUBMATCH =1 # SUBMATCH WANTED
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
14
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
15
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
16 ### NAME FORMATTING:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
17 # [A] FORMAT QUERY NAME 1st STEP [IN DEF1]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
18
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
19 # [B] FORMAT MATCH NAME 1st STEP [IN DEF2.1]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
20
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
21 # [C] FORMAT MATCH NAME 2nd STEP [MIDDLE of DEF 2.3]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
22 # [D] FORMAT QUERY NAME 2nd STEP [END of DEF 2.3]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
23 # [E] FORMAT MATCH NAME 3rd STEP [END of DEF 2.3]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
24
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
25 ### SPECIFICITY TBLASTX (/BLASTN) formatting:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
26 ## 1/ "TBLASTX" formatting => At start of "RUN RUN RUN" change the keyword
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
27 ## 2/ change line "if keyword in nextline:" in function "split_file"
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
28 ## 3/ change "Strand" by "Frame" in function "get_information_on_matches"
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
29
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
30
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
31 ############################
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
32 ### DEF4 : get sequences ###
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
33 ############################
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
34 ### [+ get informations from the function 2.2.]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
35 def get_sequences(query, list2, SUBMATCHEU,WORK_DIR):
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
36 list_Pairwise = []
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
37
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
38 F7 = open("%s/blastRun3.tmp" %WORK_DIR, 'w')
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
39 F7.write(bash1[query]) # bash1[query] ==> blast output for each query
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
40 F7.close()
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
41 F8 = open("%s/blastRun3.tmp" %WORK_DIR, 'r')
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
42
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
43 text1 = F8.readlines()
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
44
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
45 miniList = []
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
46 for name in list2: # "list2" contains name of matched sequences (long version! the list1 is the same list but for short version names). It was previously generated by "detect_Matches" function
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
47
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
48 l = -1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
49 for n in text1:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
50 l = l+1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
51 if name in n:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
52 i = l
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
53 miniList.append(i) # content positions in the list "text1", of all begining of match (e.g. >gnl|UG|Apo#S51012099 [...])
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
54
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
55 miniList.reverse()
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
56
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
57 if miniList != []:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
58 length = len(miniList)
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
59
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
60 ii = 0
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
61 Listing1 = []
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
62 while ii < length:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
63 iii = miniList[ii]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
64 entry = text1[iii:]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
65 text1 = text1[:iii]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
66 Listing1.append(entry) # each "entry" = list of thing beginning by ">"
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
67 ii = ii+1 # Listing1 is a table of table!!
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
68
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
69 Listing1.append(text1) # "text1" = the first lines (begin with "BLASTN 2.2.1 ...]"
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
70 Listing1.reverse()
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
71
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
72 Listing2 = Listing1[1:] # remove the first thing ("BLASTN ...") and keep only table beginning with a line with ">"
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
73 SEK = len(Listing2)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
74 NB_SEK = 0
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
75
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
76 for e1 in Listing2: # "Listing2" contents all the entries begining with ">"
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
77 NB_SEK = NB_SEK + 1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
78 list51 = []
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
79
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
80 l = -1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
81 for line in e1:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
82 l = l+1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
83 if "Score =" in line:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
84 index = l
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
85 list51.append(l) # index of the lines with score
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
86
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
87 list51.reverse()
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
88 Listing3 = []
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
89
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
90 for i5 in list51:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
91 e2 = e1[i5:]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
92 Listing3.append(e2)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
93 e1 = e1[:i5]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
94
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
95 ######################################
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
96 ### [C] FORMAT MATCH NAME 2nd STEP ###
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
97
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
98 BigFastaName = e1 ### LIST OF LINES <=> What is remaining after removing all the hit with "Score =", so all the text comprise between ">" and the first "Score =" ==> Include Match name & "Length & empty lines
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
99
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
100 SmallFastaName = BigFastaName[0] ## First line <=> MATCH NAME
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
101
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
102 SmallFastaName = SmallFastaName[1:-1] ### remove ">" and "\n"
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
103
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
104 """
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
105 3 lines below : only difference with S08
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
106 """
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
107 if SmallFastaName[-1] == " ":
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
108 SmallFastaName = SmallFastaName[:-1]
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
109 PutInFastaName1 = SmallFastaName
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
110
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
111 ### [C] END FORMAT MATCH NAME 2nd STEP ###
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
112 ##########################################
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
113
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
114 SUBSEK = len(Listing3)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
115 NB_SUBSEK = 0
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
116 list_inBatch = []
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
117
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
118 ### IF NO SUBMATCH WANTED !!!! => ONLY KEEP THE FIRST HIT OF "LISTING3":
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
119 if SUBMATCHEU == 0: # NO SUBMATCH WANTED !!!!
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
120 Listing4 = []
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
121 Listing4.append(Listing3[-1]) # Remove this line if submatch wanted!!!
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
122 elif SUBMATCHEU == 1:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
123 Listing4 = Listing3
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
124
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
125 for l in Listing4: ## "listing3" contents
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
126 NB_SUBSEK = NB_SUBSEK+1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
127
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
128 ll1 = string.replace(l[0], " ", "")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
129 ll2 = string.replace(l[1], " ", "")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
130 ll3 = string.replace(l[2], " ", "")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
131 PutInFastaName2 = ll1[:-1] + "||" + ll2[:-1] + "||" + ll3[:-1] # match information
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
132
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
133 seq_query = ""
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
134 pos_query = []
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
135 seq_match = ""
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
136 pos_match = []
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
137
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
138 for line in l:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
139 if "Query:" in line:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
140 line = string.replace(line, " ", " ") # remove multiple spaces in line
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
141 line = string.replace(line, " ", " ")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
142 line = string.replace(line, " ", " ")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
143
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
144 lll1 = string.split(line, " ") # split the line, 0: "Query=", 1:start, 2:seq, 3:end
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
145
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
146 pos1 = lll1[1]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
147 pos1 = string.atoi(pos1)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
148 pos_query.append(pos1)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
149
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
150 pos2 = lll1[3][:-1]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
151 pos2 = string.atoi(pos2)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
152 pos_query.append(pos2)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
153
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
154 seq = lll1[2]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
155 seq_query = seq_query + seq
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
156
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
157 if "Sbjct:" in line:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
158 line = string.replace(line, " ", " ") # remove multiple spaces in line
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
159 line = string.replace(line, " ", " ")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
160 line = string.replace(line, " ", " ")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
161
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
162 lll2 = string.split(line, " ") # split the line, 0: "Query=", 1:start, 2:seq, 3:end
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
163
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
164 pos1 = lll2[1]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
165 pos1 = string.atoi(pos1)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
166 pos_match.append(pos1)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
167
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
168 pos2 = lll2[3][:-1]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
169 pos2 = string.atoi(pos2)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
170 pos_match.append(pos2)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
171
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
172 seq = lll2[2]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
173 seq_match = seq_match + seq
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
174
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
175 ## Get the query and matched sequences and the corresponding positions
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
176 pos_query.sort() # rank small to big
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
177 pos_query_start = pos_query[0] # get the smaller
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
178 pos_query_end = pos_query[-1] # get the bigger
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
179 PutInFastaName3 = "%d...%d" %(pos_query_start, pos_query_end)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
180
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
181 ######################################
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
182 ### [D] FORMAT QUERY NAME 2nd STEP ###
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
183
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
184 FINAL_fasta_Name_Query = ">" + query + "||"+ PutInFastaName3 + "||[[%d/%d]][[%d/%d]]" %(NB_SEK, SEK, NB_SUBSEK,SUBSEK)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
185
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
186 ### [D] END FORMAT QUERY NAME 2nd STEP ###
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
187 ##########################################
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
188
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
189 pos_match.sort()
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
190 pos_match_start = pos_match[0]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
191 pos_match_end = pos_match[-1]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
192 PutInFastaName4 = "%d...%d" %(pos_match_start, pos_match_end)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
193
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
194 ######################################
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
195 ### [E] FORMAT MATCH NAME 3rd STEP ###
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
196
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
197 FINAL_fasta_Name_Match = ">" + PutInFastaName1 + "||" + PutInFastaName4 + "||[[%d/%d]][[%d/%d]]" %(NB_SEK, SEK, NB_SUBSEK,SUBSEK)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
198
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
199 ### [E] END FORMAT MATCH NAME 3rd STEP ###
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
200 ##########################################
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
201
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
202 Pairwise = [FINAL_fasta_Name_Query , seq_query , FINAL_fasta_Name_Match , seq_match] # list with 4 members
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
203 list_Pairwise.append(Pairwise)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
204
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
205 ### Get informations about matches
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
206 list_info = get_information_on_matches(l) ### DEF3 ###
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
207
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
208 F8.close()
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
209 return(list_Pairwise, list_info)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
210 #########################################
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
211
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
212
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
213 ###################
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
214 ### RUN RUN RUN ###
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
215 ###################
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
216 import string, os, time, re, sys
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
217 from functions import split_file, detect_Matches, get_information_on_matches
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
218
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
219 ## 1 ## INPUT/OUTPUT
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
220 SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
221
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
222 """
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
223 04 and 06 for S05, 11 and 13 for S08
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
224 """
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
225 path_in = "%s/04_outputBlast_%s.txt" %(SHORT_FILE, SHORT_FILE)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
226 file_out = open("%s/06_PairwiseMatch_%s.fasta" %(SHORT_FILE, SHORT_FILE),"w")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
227
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
228 ## 2 ## RUN
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
229 ## create Bash1 ##
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
230 bash1 = split_file(path_in, "TBLASTX") ### DEF1 ###
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
231
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
232 ## detect and save match ##
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
233 list_hits =[]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
234 list_no_hits = []
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
235 j= 0
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
236 k = 0
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
237 lene = len(bash1.keys())
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
238 for query in bash1.keys():
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
239 j = j+1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
240
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
241 ## 2.1. detect matches ##
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
242 list_match, list_match2, hit=detect_Matches(query, MATCH, SHORT_FILE, bash1) ### DEF2 ###
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
243
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
244 if hit == 1: # match(es)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
245 list_hits.append(query)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
246 if hit == 0: # no match for that sequence
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
247 list_no_hits.append(query)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
248
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
249 ## 2.2. get sequences ##
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
250 if hit ==1:
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
251 list_pairwiseMatch, list_info = get_sequences(query, list_match2, SUBMATCH, SHORT_FILE)#
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
252
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
253 # divergencve
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
254 divergence = list_info[6]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
255 # gap number
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
256 gap_number = list_info[7]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
257 # real divergence (divergence without accounting INDELs)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
258 real_divergence = list_info[8]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
259 # length matched
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
260 length_matched = list_info[10]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
261
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
262 ### WRITE PAIRWISE ALIGNMENT IN OUTPUT FILES
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
263 for pairwise in list_pairwiseMatch:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
264 k = k+1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
265
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
266 query_name = pairwise[0]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
267 query_seq = pairwise[1]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
268 match_name = pairwise[2]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
269 match_seq = pairwise[3]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
270
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
271 len_query_seq = len(query_seq)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
272
4
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
273 """
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
274 4 lines below : only in S05
6709645eff5d planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents: 1
diff changeset
275 """
1
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
276 Lis1 = string.split(query_name, "||")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
277 short_query_name = Lis1[0]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
278 Lis2 = string.split(match_name, "||")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
279 short_match_name = Lis2[0]
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
280
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
281 # If NO CONTROL FOR LENGTH, USE THE FOLLOWING LINES INSTEAD:
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
282
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
283 file_out.write("%s||%s||%s||%s||%s" %(query_name,divergence,gap_number,real_divergence,length_matched))
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
284 file_out.write("\n")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
285 file_out.write("%s" %query_seq)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
286 file_out.write("\n")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
287
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
288 file_out.write("%s||%s||%s||%s||%s" %(match_name,divergence,gap_number,real_divergence,length_matched))
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
289 file_out.write("\n")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
290 file_out.write("%s" %match_seq)
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
291 file_out.write("\n")
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
292
c8af52875b0f planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff changeset
293 file_out.close()