comparison scripts/S09_post_processing_of_pairwise.py @ 4:6709645eff5d draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
author abims-sbr
date Wed, 17 Jan 2018 08:53:53 -0500
parents c8af52875b0f
children
comparison
equal deleted inserted replaced
3:5f68b2fc02c1 4:6709645eff5d
2 ## AUTHOR: Eric Fontanillas 2 ## AUTHOR: Eric Fontanillas
3 ## LAST VERSION: 14/08/14 by Julie BAFFARD 3 ## LAST VERSION: 14/08/14 by Julie BAFFARD
4 4
5 MINIMUM_LENGTH = 1 5 MINIMUM_LENGTH = 1
6 6
7 ############################
8 ##### DEF1 : Get Pairs #####
9 ############################
10 def get_pairs(fasta_file_path):
11 F2 = open(fasta_file_path, "r")
12 list_pairwises = []
13 while 1:
14 next2 = F2.readline()
15 if not next2:
16 break
17 if next2[0] == ">":
18 fasta_name_query = next2[:-1]
19 next3 = F2.readline()
20 fasta_seq_query = next3[:-1]
21 next3 = F2.readline() ## jump one empty line (if any after the sequence)
22 fasta_name_match = next3[:-1]
23 next3 = F2.readline()
24 fasta_seq_match = next3[:-1]
25 pairwise = [fasta_name_query,fasta_seq_query,fasta_name_match,fasta_seq_match]
26
27 ## ADD pairwise with condition
28 list_pairwises.append(pairwise)
29 F2.close()
30 return(list_pairwises)
31 ##############################################
32
33
34 #################################
35 ##### DEF2 : Extract length #####
36 #################################
37 def extract_length(length_string): # format length string = 57...902
38 l3 = string.split(length_string, "...")
39 n1 = string.atoi(l3[0])
40 n2 = string.atoi(l3[1])
41 length = n2-n1
42 return(length)
43 ##############################################
44
45
46 ####################################
47 ##### DEF3 : Remove Redondancy #####
48 ####################################
49 def filter_redondancy(list_paireu, MIN_LENGTH):
50
51 bash1 = {}
52 list_pairout = []
53
54 for pair in list_paireu:
55 query_name = pair[0]
56 query_seq = pair[1]
57 match_name = pair[2]
58 match_seq = pair[3]
59
60 l1 = string.split(query_name, "||")
61 short_query_name = l1[0][1:]
62 length_matched = extract_length(l1[1]) ### DEF2 ###
63 l2 = string.split(match_name, "||")
64 short_match_name = l2[0][1:]
65 binom = "%s_%s" %(short_query_name, short_match_name)
66
67 if binom not in bash1.keys():
68 bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched]
69 else:
70 old_length = bash1[binom][-1]
71 if length_matched > old_length:
72 bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched]
73
74
75 for bino in bash1.keys():
76 length = bash1[bino][-1]
77 if length > MIN_LENGTH:
78 list_pairout.append(bash1[bino])
79
80
81 return(list_pairout)
82 ##############################################
83
84
85 ####################### 7 #######################
86 ##### RUN RUN RUN ##### 8 ##### RUN RUN RUN #####
87 ####################### 9 #######################
88 import string, os, time, re, sys 10 import string, os, time, re, sys
11 from functions import get_pairs, extract_length, filter_redondancy
89 12
90 ## 1 ## INPUT/OUTPUT 13 ## 1 ## INPUT/OUTPUT
91 SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db 14 SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db
92 15
93 F_IN = "%s/13_PairwiseMatch_%s.fasta" %(SHORT_FILE, SHORT_FILE) 16 F_IN = "%s/13_PairwiseMatch_%s.fasta" %(SHORT_FILE, SHORT_FILE)