annotate scripts/S05_find_rbh.py @ 11:ae65822c1fb7 draft default tip

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1
author lecorguille
date Mon, 24 Sep 2018 03:56:40 -0400
parents 1e0c547d88fe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
1 #!/usr/bin/env python
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
2 # coding: utf-8
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
3 # Author : Victor Mataigne
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
4
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
5 import argparse, pickle, itertools
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
6
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
7 def main():
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
8 parser = argparse.ArgumentParser()
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
9 parser.add_argument('besthits_file1', help='')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
10 parser.add_argument('besthits_file2', help='')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
11 args = parser.parse_args()
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
12
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
13 # Open dict of best hits
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
14 file_best_hit_dict_q = open('dict_best_hits_from_blast_1')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
15 file_best_hit_dict_db = open('dict_best_hits_from_blast_2')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
16 best_hit_dict_q = pickle.load(file_best_hit_dict_q)
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
17 best_hit_dict_db = pickle.load(file_best_hit_dict_db)
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
18 file_best_hit_dict_q.close()
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
19 file_best_hit_dict_db.close()
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
20
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
21 best_h1 = {}
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
22 with open(args.besthits_file1, 'r') as bh1 :
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
23 for h, s in itertools.izip_longest(*[bh1]*2):
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
24 header = h.strip('>\n')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
25 sequence = s.strip('\n')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
26 best_h1[header] = sequence
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
27
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
28 best_h2 = {}
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
29 with open(args.besthits_file2, 'r') as bh2 :
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
30 for h, s in itertools.izip_longest(*[bh2]*2):
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
31 header = h.strip('>\n')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
32 sequence = s.strip('\n')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
33 best_h2[header] = sequence
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
34
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
35 # Find RBH:
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
36 reverse_best_hit_dict_db = dict((v,k) for k,v in best_hit_dict_db.iteritems())
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
37
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
38 rbh = set(best_hit_dict_q.items()).intersection(set(reverse_best_hit_dict_db.items()))
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
39
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
40 s = args.besthits_file1.split('_')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
41 suffix = s[4] + '_' + s[5]
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
42 out_name = 'RBH_{}_dna.fasta'.format(suffix)
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
43 output = open(out_name, 'w')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
44
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
45 for pairwise_couple in rbh :
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
46 output.write('>'+pairwise_couple[0]+'\n')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
47 output.write(best_h1[pairwise_couple[0]]+'\n')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
48 output.write('>'+pairwise_couple[1]+'\n')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
49 output.write(best_h2[pairwise_couple[1]]+'\n')
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
50 output.close()
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
51
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
52 if __name__ == "__main__":
1e0c547d88fe planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
diff changeset
53 main()