annotate scripts/functions.py @ 6:fe74cf0d4e7a draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 7c9540507076087e858141f9dd5df33409690dac
author abims-sbr
date Wed, 28 Feb 2018 06:06:14 -0500
parents 988467f963f0
children f1e24200e5ae
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
1 def simplify_fasta_name(fasta_name,LT):
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
2 for abbreviation in LT:
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
3 if abbreviation in fasta_name:
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
4 new_fasta_name = abbreviation
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
5
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
6 return(new_fasta_name)
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
7
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
8 ## Generates bash, with key = fasta name; value = sequence (WITH GAP, IF ANY, REMOVED IN THIS FUNCTION)
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
9 def dico(fasta_file,LT):
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
10
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
11 count_fastaName=0
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
12 F1 = open(fasta_file, "r")
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
13
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
14 bash1 = {}
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
15 while 1:
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
16 nextline = F1.readline()
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
17 #print nextline
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
18 if not nextline :
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
19 break
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
20
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
21 if nextline[0] == ">":
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
22 count_fastaName = count_fastaName + 1
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
23 fasta_name = nextline[1:-1]
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
24 nextline = F1.readline()
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
25 sequence = nextline[:-1]
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
26
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
27 if fasta_name not in bash1.keys():
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
28 fasta_name = simplify_fasta_name(fasta_name,LT) ### DEF 0 ###
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
29 bash1[fasta_name] = sequence
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
30 else:
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
31 print fasta_name
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
32
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
33 # Find alignment length
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
34 kk = bash1.keys()
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
35 key0 = kk[0]
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
36 seq0 = bash1[key0]
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
37 ln_seq = len(seq0)
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
38
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
39 F1.close()
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
40
988467f963f0 planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
diff changeset
41 return(bash1)