annotate scripts/functions.py @ 7:f1e24200e5ae draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
author abims-sbr
date Wed, 28 Feb 2018 10:39:41 -0500
parents 988467f963f0
children f62c76aab669
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
1 import itertools
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
2
2
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
3 def simplify_fasta_name(fasta_name,LT):
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
4 for abbreviation in LT:
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
5 if abbreviation in fasta_name:
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
6 new_fasta_name = abbreviation
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
7
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
8 return(new_fasta_name)
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
9
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
10 ## Generates bash, with key = fasta name; value = sequence (WITH GAP, IF ANY, REMOVED IN THIS FUNCTION)
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
11 def dico(fasta_file,LT):
7
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
12 #count_fastaName = 0
2
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
13 bash1 = {}
7
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
14 with open(fasta_file, "r") as file:
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
15 for name, query in itertools.izip_longest(*[file]*2):
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
16 if not name:
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
17 break
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
18 if name[0] == ">":
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
19 #count_fastaName += 1
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
20 fasta_name = name[1:-1]
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
21 sequence = query[:-1]
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
22 if fasta_name not in bash1.keys():
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
23 fasta_name = simplify_fasta_name(fasta_name, LT)
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
24 bash1[fasta_name] = sequence
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
25 else :
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
26 print fasta_name
2
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
27
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
28 kk = bash1.keys()
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
29 key0 = kk[0]
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
30 seq0 = bash1[key0]
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
31 ln_seq = len(seq0)
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
32
7
f1e24200e5ae planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents: 2
diff changeset
33 return(bash1)