Mercurial > repos > abims-sbr > pairwise
comparison scripts/S08_script_extract_match_v20_blastx.py @ 4:6709645eff5d draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
| author | abims-sbr |
|---|---|
| date | Wed, 17 Jan 2018 08:53:53 -0500 |
| parents | c8af52875b0f |
| children |
comparison
equal
deleted
inserted
replaced
| 3:5f68b2fc02c1 | 4:6709645eff5d |
|---|---|
| 4 | 4 |
| 5 ### TBLASTX formatting | 5 ### TBLASTX formatting |
| 6 | 6 |
| 7 ### MATCH = Only the first match keeped | 7 ### MATCH = Only the first match keeped |
| 8 MATCH = 0 # Only 1rst match Wanted | 8 MATCH = 0 # Only 1rst match Wanted |
| 9 #MATCH = 1 # All match want | 9 #MATCH = 1 # All match wanted |
| 10 | |
| 10 ### SUBMATCH = several part of a same sequence match with the query | 11 ### SUBMATCH = several part of a same sequence match with the query |
| 11 SUBMATCH = 0 # SUBMATCH NOT WANTED (ONLY 1rst HIT) | 12 SUBMATCH = 0 # SUBMATCH NOT WANTED (ONLY 1rst HIT) |
| 12 #SUBMATCH =1 # SUBMATCH WANTED | 13 #SUBMATCH =1 # SUBMATCH WANTED |
| 13 | 14 |
| 14 | 15 |
| 23 | 24 |
| 24 ### SPECIFICITY TBLASTX (/BLASTN) formatting: | 25 ### SPECIFICITY TBLASTX (/BLASTN) formatting: |
| 25 ## 1/ "TBLASTX" formatting => At start of "RUN RUN RUN" change the keyword | 26 ## 1/ "TBLASTX" formatting => At start of "RUN RUN RUN" change the keyword |
| 26 ## 2/ change line "if keyword in nextline:" in function "split_file" | 27 ## 2/ change line "if keyword in nextline:" in function "split_file" |
| 27 ## 3/ change "Strand" by "Frame" in function "get_information_on_matches" | 28 ## 3/ change "Strand" by "Frame" in function "get_information_on_matches" |
| 28 | |
| 29 | |
| 30 | |
| 31 ######################################## | |
| 32 ### DEF 1. Split each "BLASTN" event ### | |
| 33 ######################################## | |
| 34 def split_file(path_in, keyword): | |
| 35 | |
| 36 file_in = open(path_in, "r") | |
| 37 | |
| 38 RUN = '' | |
| 39 BASH1={} | |
| 40 | |
| 41 while 1: | |
| 42 nextline = file_in.readline() | |
| 43 | |
| 44 ################################## | |
| 45 ### [A] FORMATTING QUERY NAME ### | |
| 46 | |
| 47 # Get query name | |
| 48 if nextline[0:6]=='Query=': | |
| 49 L1 = string.split(nextline, "||") | |
| 50 L2 = string.split(L1[0], " ") | |
| 51 query = L2[1] | |
| 52 if query[-1] == "\n": | |
| 53 query = query[:-1] | |
| 54 | |
| 55 ### [A] END FORMATTING QUERY NAME ### | |
| 56 ###################################### | |
| 57 | |
| 58 | |
| 59 ### split the file with keyword ### | |
| 60 if keyword in nextline: | |
| 61 # Two cases here: | |
| 62 #1# If it is the first "RUN" in the block (i.e. the first occurence of "BLASTN" in the file), we have just to add the new lines in the "RUN" list ... 2nd , we have also to detect the 'key' of bash1, which is the "query" name ... and third we will have to save this "RUN" in the bash1, once we will have detected a new "RUN" (i.e. a new line beginning with "BLASTN". | |
| 63 #2# If it isn't the first run, we have the save the previous "RUN" in the "bash1", before to re-initialize the RUN list (RUN =[]), before to append lines to the new "RUN" | |
| 64 | |
| 65 if RUN == '': # case #1# | |
| 66 RUN = RUN + nextline # we just added the first line of the file | |
| 67 | |
| 68 else: # case #2# (there was a run before) | |
| 69 BASH1[query] = RUN # add the previous run to the bash | |
| 70 RUN = '' # re-initialize the "RUN" | |
| 71 RUN = RUN + nextline # add the line starting with the keyword ("BLASTN") (except the first line of the file (the first "RUN") | |
| 72 | |
| 73 else: # Treatment of the subsequent lines of the one starting with the keyword ("BLASTN") (which is not treated here but previously) | |
| 74 RUN = RUN + nextline | |
| 75 | |
| 76 | |
| 77 if not nextline: # when no more line, we should record the last "RUN" in the bash1 | |
| 78 BASH1[query] = RUN # add the last "RUN" | |
| 79 break | |
| 80 | |
| 81 | |
| 82 file_in.close() | |
| 83 return(BASH1) | |
| 84 ######################################################### | |
| 85 | |
| 86 | |
| 87 ############################################### | |
| 88 ### DEF2 : Parse blast output for each query### | |
| 89 ############################################### | |
| 90 | |
| 91 ### 2.1. detect matches (i.e. 'Sequences producing significant alignments:' ### | |
| 92 def detect_Matches(query, MATCH, WORK_DIR): | |
| 93 F5 = open("%s/blastRun2.tmp" %WORK_DIR, 'w') | |
| 94 F5.write(bash1[query]) | |
| 95 F5.close() | |
| 96 | |
| 97 F6 = open("%s/blastRun2.tmp" %WORK_DIR, 'r') | |
| 98 list1 =[] | |
| 99 list2 =[] | |
| 100 | |
| 101 while 1: | |
| 102 nexteu = F6.readline() | |
| 103 | |
| 104 if not nexteu : break | |
| 105 | |
| 106 if "***** No hits found ******" in nexteu : | |
| 107 hit = 0 | |
| 108 break | |
| 109 | |
| 110 if 'Sequences producing significant alignments:' in nexteu: | |
| 111 hit = 1 | |
| 112 F6.readline() # jump a line | |
| 113 | |
| 114 while 1: | |
| 115 nexteu2 = F6.readline() | |
| 116 | |
| 117 if nexteu2[0]==">": break | |
| 118 | |
| 119 ###################################### | |
| 120 ### [B] FORMAT MATCH NAME 1st STEP ### | |
| 121 | |
| 122 if nexteu2 != '\n': | |
| 123 LL1 = string.split(nexteu2, " ") # specific NORTH database names !!!!!!! | |
| 124 match = LL1[0] #### SOUTH databank // NORTH will have "|" separators | |
| 125 list1.append(match) | |
| 126 | |
| 127 match2 = ">" + LL1[0] # more complete name // still specific NORTH database names !!!!!!! | |
| 128 list2.append(match2) #### SOUTH databank // NORTH will have "|" separators | |
| 129 | |
| 130 if MATCH == 0: ## Only read the 1rst line (i.e. the First Match) | |
| 131 break | |
| 132 else: ## Read the other lines (i.e. All the Matches) | |
| 133 continue | |
| 134 | |
| 135 ### [B] END FORMAT MATCH NAME 1st STEP ### | |
| 136 ########################################## | |
| 137 | |
| 138 break | |
| 139 | |
| 140 F6.close() | |
| 141 return(list1, list2, hit) # list1 = short name // list2 = more complete name | |
| 142 ###################################### | |
| 143 | |
| 144 | |
| 145 ######################################### | |
| 146 ### DEF3 : Get Information on matches ### | |
| 147 ######################################### | |
| 148 ### Function used in the next function (2.3.) | |
| 149 def get_information_on_matches(list_of_line): | |
| 150 | |
| 151 for line in list_of_line: | |
| 152 | |
| 153 ## Score and Expect | |
| 154 if "Score" in line: | |
| 155 #print line | |
| 156 line = line[:-1] # remove "\n" | |
| 157 S_line = string.split(line, " = ") | |
| 158 Expect = S_line[-1] ## ***** Expect | |
| 159 S_line2 = string.split(S_line[1], " bits ") | |
| 160 Score = string.atof(S_line2[0]) | |
| 161 | |
| 162 | |
| 163 ## Identities/gaps/percent/divergence/length_matched | |
| 164 elif "Identities" in line: | |
| 165 line = line[:-1] # remove "\n" | |
| 166 | |
| 167 g = 0 | |
| 168 if "Gaps" in line: | |
| 169 #print "HIT!!!" | |
| 170 pre_S_line = string.split(line, ",") | |
| 171 identity_line = pre_S_line[0] | |
| 172 gaps_line = pre_S_line[1] | |
| 173 g = 1 | |
| 174 else: | |
| 175 identity_line = line | |
| 176 g = 0 | |
| 177 | |
| 178 ## treat identity line | |
| 179 S_line = string.split(identity_line, " ") | |
| 180 | |
| 181 identities = S_line[-2] ## ***** identities | |
| 182 #print "\t\tIdentities = %s" %identities | |
| 183 | |
| 184 S_line2 = string.split(identities, "/") | |
| 185 hits = string.atof(S_line2[0]) ## ***** hits | |
| 186 length_matched = string.atof(S_line2[1]) ## ***** length_matched | |
| 187 abs_nb_differences = length_matched - hits ## ***** abs_nb_differences | |
| 188 | |
| 189 | |
| 190 ## identity_percent = S_line[-1] | |
| 191 identity_percent = hits/length_matched * 100 ## ***** identity_percent | |
| 192 #print "\t\tIdentity (percent) = %.2f" %identity_percent | |
| 193 | |
| 194 divergence_percent = abs_nb_differences/length_matched*100 ## ***** divergence_percent | |
| 195 #print "\t\tDivergence (percent) = %.2f" %divergence_percent | |
| 196 | |
| 197 ## treat gap line if any | |
| 198 if g ==1: # means there are gaps | |
| 199 S_line3 = string.split(gaps_line, " ") | |
| 200 gaps_part = S_line3[-2] | |
| 201 S_line4 = string.split(gaps_part, "/") | |
| 202 gaps_number = string.atoi(S_line4[0]) ## ***** gaps_number | |
| 203 #print "\t\tGaps number = %s" %gaps_number | |
| 204 | |
| 205 real_differences = abs_nb_differences - gaps_number ## ***** real_differences | |
| 206 real_divergence_percent = (real_differences/length_matched)*100 ## ***** real_divergence_percent | |
| 207 #print "\t\tReal divergence (percent)= %.2f" %real_divergence_percent | |
| 208 else: | |
| 209 gaps_number = 0 | |
| 210 #print "\t\tGaps number = %s" %gaps_number | |
| 211 real_differences = 0 | |
| 212 real_divergence_percent = divergence_percent | |
| 213 | |
| 214 ## Strand | |
| 215 #elif "Strand" in line: | |
| 216 # line = line[:-1] # remove "\n" | |
| 217 # S_line = string.split(line, " = ") | |
| 218 # strand = S_line[1] | |
| 219 # print "\t\tStrand = %s" %strand | |
| 220 | |
| 221 ## Frame | |
| 222 elif "Frame" in line: | |
| 223 line = line[:-1] # remove "\n" | |
| 224 S_line = string.split(line, " = ") | |
| 225 frame = S_line[1] | |
| 226 #print "\t\tFrame = %s" %frame | |
| 227 | |
| 228 | |
| 229 | |
| 230 list_informations=[length_matched, Expect, Score, identities, hits, identity_percent, divergence_percent,gaps_number, real_divergence_percent, frame, length_matched] | |
| 231 | |
| 232 return(list_informations) | |
| 233 ################################## | |
| 234 | 29 |
| 235 | 30 |
| 236 ############################ | 31 ############################ |
| 237 ### DEF4 : get sequences ### | 32 ### DEF4 : get sequences ### |
| 238 ############################ | 33 ############################ |
| 274 Listing1.append(text1) # "text1" = the first lines (begin with "BLASTN 2.2.1 ...]" | 69 Listing1.append(text1) # "text1" = the first lines (begin with "BLASTN 2.2.1 ...]" |
| 275 Listing1.reverse() | 70 Listing1.reverse() |
| 276 | 71 |
| 277 Listing2 = Listing1[1:] # remove the first thing ("BLASTN ...") and keep only table beginning with a line with ">" | 72 Listing2 = Listing1[1:] # remove the first thing ("BLASTN ...") and keep only table beginning with a line with ">" |
| 278 SEK = len(Listing2) | 73 SEK = len(Listing2) |
| 279 | |
| 280 NB_SEK = 0 | 74 NB_SEK = 0 |
| 281 | 75 |
| 282 for e1 in Listing2: # "Listing2" contents all the entries begining with ">" | 76 for e1 in Listing2: # "Listing2" contents all the entries begining with ">" |
| 283 NB_SEK = NB_SEK + 1 | 77 NB_SEK = NB_SEK + 1 |
| 284 list51 = [] | 78 list51 = [] |
| 288 l = l+1 | 82 l = l+1 |
| 289 if "Score =" in line: | 83 if "Score =" in line: |
| 290 index = l | 84 index = l |
| 291 list51.append(l) # index of the lines with score | 85 list51.append(l) # index of the lines with score |
| 292 | 86 |
| 293 list51.reverse() | 87 list51.reverse() |
| 294 | |
| 295 Listing3 = [] | 88 Listing3 = [] |
| 296 | 89 |
| 297 for i5 in list51: | 90 for i5 in list51: |
| 298 e2 = e1[i5:] | 91 e2 = e1[i5:] |
| 299 Listing3.append(e2) | 92 Listing3.append(e2) |
| 307 | 100 |
| 308 SmallFastaName = BigFastaName[0] ## First line <=> MATCH NAME | 101 SmallFastaName = BigFastaName[0] ## First line <=> MATCH NAME |
| 309 | 102 |
| 310 SmallFastaName = SmallFastaName[1:-2] ### remove ">" and "\n" | 103 SmallFastaName = SmallFastaName[1:-2] ### remove ">" and "\n" |
| 311 | 104 |
| 312 | 105 """ |
| 106 3 lines below : only difference with S05 | |
| 107 """ | |
| 313 S1 = string.split(SmallFastaName, "||") | 108 S1 = string.split(SmallFastaName, "||") |
| 314 S2 = string.split(S1[0], " ") | 109 S2 = string.split(S1[0], " ") |
| 315 | |
| 316 | |
| 317 PutInFastaName1 = S2[0] | 110 PutInFastaName1 = S2[0] |
| 318 | 111 |
| 319 ### [C] END FORMAT MATCH NAME 2nd STEP ### | 112 ### [C] END FORMAT MATCH NAME 2nd STEP ### |
| 320 ########################################## | 113 ########################################## |
| 321 | 114 |
| 322 SUBSEK = len(Listing3) | 115 SUBSEK = len(Listing3) |
| 323 NB_SUBSEK = 0 | 116 NB_SUBSEK = 0 |
| 324 list_inBatch = [] | 117 list_inBatch = [] |
| 118 | |
| 325 ### IF NO SUBMATCH WANTED !!!! => ONLY KEEP THE FIRST HIT OF "LISTING3": | 119 ### IF NO SUBMATCH WANTED !!!! => ONLY KEEP THE FIRST HIT OF "LISTING3": |
| 326 if SUBMATCHEU == 0: # NO SUBMATCH WANTED !!!! | 120 if SUBMATCHEU == 0: # NO SUBMATCH WANTED !!!! |
| 327 Listing4 = [] | 121 Listing4 = [] |
| 328 Listing4.append(Listing3[-1]) # Remove this line if submatch wanted!!! | 122 Listing4.append(Listing3[-1]) # Remove this line if submatch wanted!!! |
| 329 elif SUBMATCHEU == 1: | 123 elif SUBMATCHEU == 1: |
| 330 Listing4 = Listing3 | 124 Listing4 = Listing3 |
| 331 | 125 |
| 332 | 126 for l in Listing4: ## "listing3" contents |
| 333 for l in Listing4: ## "listing3" contents | |
| 334 | |
| 335 NB_SUBSEK = NB_SUBSEK+1 | 127 NB_SUBSEK = NB_SUBSEK+1 |
| 336 | 128 |
| 337 ll1 = string.replace(l[0], " ", "") | 129 ll1 = string.replace(l[0], " ", "") |
| 338 ll2 = string.replace(l[1], " ", "") | 130 ll2 = string.replace(l[1], " ", "") |
| 339 ll3 = string.replace(l[2], " ", "") | 131 ll3 = string.replace(l[2], " ", "") |
| 341 | 133 |
| 342 seq_query = "" | 134 seq_query = "" |
| 343 pos_query = [] | 135 pos_query = [] |
| 344 seq_match = "" | 136 seq_match = "" |
| 345 pos_match = [] | 137 pos_match = [] |
| 138 | |
| 346 for line in l: | 139 for line in l: |
| 347 if "Query:" in line: | 140 if "Query:" in line: |
| 348 line = string.replace(line, " ", " ") # remove multiple spaces in line | 141 line = string.replace(line, " ", " ") # remove multiple spaces in line |
| 349 line = string.replace(line, " ", " ") | 142 line = string.replace(line, " ", " ") |
| 350 line = string.replace(line, " ", " ") | 143 line = string.replace(line, " ", " ") |
| 358 pos2 = lll1[3][:-1] | 151 pos2 = lll1[3][:-1] |
| 359 pos2 = string.atoi(pos2) | 152 pos2 = string.atoi(pos2) |
| 360 pos_query.append(pos2) | 153 pos_query.append(pos2) |
| 361 | 154 |
| 362 seq = lll1[2] | 155 seq = lll1[2] |
| 363 seq_query = seq_query + seq | 156 seq_query = seq_query + seq |
| 364 | |
| 365 | 157 |
| 366 if "Sbjct:" in line: | 158 if "Sbjct:" in line: |
| 367 line = string.replace(line, " ", " ") # remove multiple spaces in line | 159 line = string.replace(line, " ", " ") # remove multiple spaces in line |
| 368 line = string.replace(line, " ", " ") | 160 line = string.replace(line, " ", " ") |
| 369 line = string.replace(line, " ", " ") | 161 line = string.replace(line, " ", " ") |
| 379 pos_match.append(pos2) | 171 pos_match.append(pos2) |
| 380 | 172 |
| 381 seq = lll2[2] | 173 seq = lll2[2] |
| 382 seq_match = seq_match + seq | 174 seq_match = seq_match + seq |
| 383 | 175 |
| 384 ## Get the query and matched sequences and the corresponding positions | 176 ## Get the query and matched sequences and the corresponding positions |
| 385 | |
| 386 pos_query.sort() # rank small to big | 177 pos_query.sort() # rank small to big |
| 387 pos_query_start = pos_query[0] # get the smaller | 178 pos_query_start = pos_query[0] # get the smaller |
| 388 pos_query_end = pos_query[-1] # get the bigger | 179 pos_query_end = pos_query[-1] # get the bigger |
| 389 PutInFastaName3 = "%d...%d" %(pos_query_start, pos_query_end) | 180 PutInFastaName3 = "%d...%d" %(pos_query_start, pos_query_end) |
| 390 | 181 |
| 391 | |
| 392 ###################################### | 182 ###################################### |
| 393 ### [D] FORMAT QUERY NAME 2nd STEP ### | 183 ### [D] FORMAT QUERY NAME 2nd STEP ### |
| 394 | 184 |
| 395 FINAL_fasta_Name_Query = ">" + query + "||"+ PutInFastaName3 + "||[[%d/%d]][[%d/%d]]" %(NB_SEK, SEK, NB_SUBSEK,SUBSEK) | 185 FINAL_fasta_Name_Query = ">" + query + "||"+ PutInFastaName3 + "||[[%d/%d]][[%d/%d]]" %(NB_SEK, SEK, NB_SUBSEK,SUBSEK) |
| 396 | 186 |
| 397 ### [D] END FORMAT QUERY NAME 2nd STEP ### | 187 ### [D] END FORMAT QUERY NAME 2nd STEP ### |
| 398 ########################################## | 188 ########################################## |
| 399 | |
| 400 | 189 |
| 401 pos_match.sort() | 190 pos_match.sort() |
| 402 pos_match_start = pos_match[0] | 191 pos_match_start = pos_match[0] |
| 403 pos_match_end = pos_match[-1] | 192 pos_match_end = pos_match[-1] |
| 404 PutInFastaName4 = "%d...%d" %(pos_match_start, pos_match_end) | 193 PutInFastaName4 = "%d...%d" %(pos_match_start, pos_match_end) |
| 405 | 194 |
| 406 | |
| 407 ###################################### | 195 ###################################### |
| 408 ### [E] FORMAT MATCH NAME 3rd STEP ### | 196 ### [E] FORMAT MATCH NAME 3rd STEP ### |
| 409 | 197 |
| 410 FINAL_fasta_Name_Match = ">" + PutInFastaName1 + "||" + PutInFastaName4 + "||[[%d/%d]][[%d/%d]]" %(NB_SEK, SEK, NB_SUBSEK,SUBSEK) | 198 FINAL_fasta_Name_Match = ">" + PutInFastaName1 + "||" + PutInFastaName4 + "||[[%d/%d]][[%d/%d]]" %(NB_SEK, SEK, NB_SUBSEK,SUBSEK) |
| 411 | 199 |
| 412 ### [E] END FORMAT MATCH NAME 3rd STEP ### | 200 ### [E] END FORMAT MATCH NAME 3rd STEP ### |
| 413 ########################################## | 201 ########################################## |
| 414 | 202 |
| 415 | |
| 416 | |
| 417 Pairwise = [FINAL_fasta_Name_Query , seq_query , FINAL_fasta_Name_Match , seq_match] # list with 4 members | 203 Pairwise = [FINAL_fasta_Name_Query , seq_query , FINAL_fasta_Name_Match , seq_match] # list with 4 members |
| 418 list_Pairwise.append(Pairwise) | 204 list_Pairwise.append(Pairwise) |
| 419 | 205 |
| 420 ### Get informations about matches | 206 ### Get informations about matches |
| 421 list_info = get_information_on_matches(l) ### DEF3 ### | 207 list_info = get_information_on_matches(l) ### DEF3 ### |
| 422 | |
| 423 | 208 |
| 424 F8.close() | 209 F8.close() |
| 425 return(list_Pairwise, list_info) | 210 return(list_Pairwise, list_info) |
| 426 ######################################### | 211 ######################################### |
| 427 | 212 |
| 428 | 213 |
| 429 ################### | 214 ################### |
| 430 ### RUN RUN RUN ### | 215 ### RUN RUN RUN ### |
| 431 ################### | 216 ################### |
| 432 import string, os, time, re, sys | 217 import string, os, time, re, sys |
| 218 from functions import split_file, detect_Matches, get_information_on_matches | |
| 433 | 219 |
| 434 ## 1 ## INPUT/OUTPUT | 220 ## 1 ## INPUT/OUTPUT |
| 435 SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db | 221 SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db |
| 436 | 222 |
| 223 """ | |
| 224 04 and 06 for S05, 11 and 13 for S08 | |
| 225 """ | |
| 437 path_in = "%s/11_outputBlast_%s.txt" %(SHORT_FILE, SHORT_FILE) | 226 path_in = "%s/11_outputBlast_%s.txt" %(SHORT_FILE, SHORT_FILE) |
| 438 file_out = open("%s/13_PairwiseMatch_%s.fasta" %(SHORT_FILE, SHORT_FILE),"w") | 227 file_out = open("%s/13_PairwiseMatch_%s.fasta" %(SHORT_FILE, SHORT_FILE),"w") |
| 439 | 228 |
| 440 ## 2 ## RUN | 229 ## 2 ## RUN |
| 441 ## create Bash1 ## | 230 ## create Bash1 ## |
| 442 bash1 = split_file(path_in, "TBLASTX") ### DEF1 ### | 231 bash1 = split_file(path_in, "TBLASTX") ### DEF1 ### |
| 443 | 232 |
| 444 ## detect and save match ## | 233 ## detect and save match ## |
| 445 list_hits =[] | 234 list_hits =[] |
| 446 list_no_hits = [] | 235 list_no_hits = [] |
| 447 j= 0 | 236 j= 0 |
| 449 lene = len(bash1.keys()) | 238 lene = len(bash1.keys()) |
| 450 for query in bash1.keys(): | 239 for query in bash1.keys(): |
| 451 j = j+1 | 240 j = j+1 |
| 452 | 241 |
| 453 ## 2.1. detect matches ## | 242 ## 2.1. detect matches ## |
| 454 list_match, list_match2, hit=detect_Matches(query, MATCH, SHORT_FILE) ### DEF2 ### | 243 list_match, list_match2, hit=detect_Matches(query, MATCH, SHORT_FILE, bash1) ### DEF2 ### |
| 455 | 244 |
| 456 if hit == 1: # match(es) | 245 if hit == 1: # match(es) |
| 457 list_hits.append(query) | 246 list_hits.append(query) |
| 458 if hit == 0: # no match for that sequence | 247 if hit == 0: # no match for that sequence |
| 459 list_no_hits.append(query) | 248 list_no_hits.append(query) |
| 460 | 249 |
| 461 ## 2.2. get sequences ## | 250 ## 2.2. get sequences ## |
| 462 if hit ==1: | 251 if hit ==1: |
| 463 list_pairwiseMatch, list_info = get_sequences(query, list_match2, SUBMATCH, SHORT_FILE) ### DEF4 ### | 252 list_pairwiseMatch, list_info = get_sequences(query, list_match2, SUBMATCH, SHORT_FILE) |
| 464 | 253 |
| 465 # divergencve | 254 # divergencve |
| 466 divergence = list_info[6] | 255 divergence = list_info[6] |
| 467 # gap number | 256 # gap number |
| 468 gap_number = list_info[7] | 257 gap_number = list_info[7] |
| 480 match_name = pairwise[2] | 269 match_name = pairwise[2] |
| 481 match_seq = pairwise[3] | 270 match_seq = pairwise[3] |
| 482 | 271 |
| 483 len_query_seq = len(query_seq) | 272 len_query_seq = len(query_seq) |
| 484 | 273 |
| 485 | |
| 486 # If NO CONTROL FOR LENGTH, USE THE FOLLOWING LINES INSTEAD: | 274 # If NO CONTROL FOR LENGTH, USE THE FOLLOWING LINES INSTEAD: |
| 487 | 275 |
| 488 file_out.write("%s||%s||%s||%s||%s" %(query_name,divergence,gap_number,real_divergence,length_matched)) | 276 file_out.write("%s||%s||%s||%s||%s" %(query_name,divergence,gap_number,real_divergence,length_matched)) |
| 489 file_out.write("\n") | 277 file_out.write("\n") |
| 490 file_out.write("%s" %query_seq) | 278 file_out.write("%s" %query_seq) |
