comparison scripts/S01b_study_seq_composition_aa.py @ 2:988467f963f0 draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
author abims-sbr
date Wed, 17 Jan 2018 08:57:49 -0500
parents 8de21b6eb110
children 5766f80370e7
comparison
equal deleted inserted replaced
1:8de21b6eb110 2:988467f963f0
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # -*- coding: ascii -*- 2 # -*- coding: ascii -*-
3 ## Author: Eric FONTANILLAS 3 ## Author: Eric FONTANILLAS
4 ## Date: 21.12.10 4 ## Date: 21.12.10
5 ## Last Version : 12/2017 by Victor Mataigne
5 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and two "cold" species: Pg, Pp) 6 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and two "cold" species: Pg, Pp)
7
6 import sys,os,shutil,subprocess,string 8 import sys,os,shutil,subprocess,string
9 from functions import simplify_fasta_name, dico
10
7 script_path = os.path.dirname(sys.argv[0]) 11 script_path = os.path.dirname(sys.argv[0])
8
9 #############
10 ### DEF 0 ###
11 #############
12 def simplify_fasta_name(fasta_name,LT):
13 for abbreviation in LT:
14 if abbreviation in fasta_name:
15 new_fasta_name = abbreviation
16
17 return(new_fasta_name)
18 ##########################################
19
20 ###########
21 ## DEF1 ##
22 ###########
23 ## Generates bash, with key = fasta name; value = sequence (WITH GAP, IF ANY, REMOVED IN THIS FUNCTION)
24
25 def dico(fasta_file,LT):
26
27 count_fastaName=0
28 F1 = open(fasta_file, "r")
29
30 bash1 = {}
31 while 1:
32 nextline = F1.readline()
33 #print nextline
34 if not nextline :
35 break
36
37 if nextline[0] == ">":
38 count_fastaName = count_fastaName + 1
39 fasta_name = nextline[1:-1]
40 nextline = F1.readline()
41 sequence = nextline[:-1]
42
43 if fasta_name not in bash1.keys():
44 fasta_name = simplify_fasta_name(fasta_name,LT) ### DEF 0 ###
45 bash1[fasta_name] = sequence
46 else:
47 print fasta_name
48
49 # Find alignment length
50 kk = bash1.keys()
51 key0 = kk[0]
52 seq0 = bash1[key0]
53 ln_seq = len(seq0)
54
55 F1.close()
56
57 return(bash1)
58 #####################################
59
60
61 12
62 ################## 13 ##################
63 ###### DEF2 ###### 14 ###### DEF2 ######
64 ################## 15 ##################
65 def base_composition(seq): 16 def base_composition(seq):
591 fileOUT_PAYRE.write("%.5f,%.5f,%.5f,%.5f,%.5f,%.5f,%.5f,%.5f," %(count_PAYRE,prop_PAYRE,count_AC,prop_AC,count_MVGDS,prop_MVGDS,ratio_PAYRE_vs_MVGDS,ratio_AC_vs_MVGDS)) 542 fileOUT_PAYRE.write("%.5f,%.5f,%.5f,%.5f,%.5f,%.5f,%.5f,%.5f," %(count_PAYRE,prop_PAYRE,count_AC,prop_AC,count_MVGDS,prop_MVGDS,ratio_PAYRE_vs_MVGDS,ratio_AC_vs_MVGDS))
592 fileOUT_TotalResidueWeight.write("%.5f," %Total_Residue_Weight) 543 fileOUT_TotalResidueWeight.write("%.5f," %Total_Residue_Weight)
593 fileOUT_TotalResidueVolume.write("%.5f," %Total_Residue_Volume) 544 fileOUT_TotalResidueVolume.write("%.5f," %Total_Residue_Volume)
594 fileOUT_TotalPartialSpecificVolume.write("%.5f," %(Total_Partial_Specific_Volume)) 545 fileOUT_TotalPartialSpecificVolume.write("%.5f," %(Total_Partial_Specific_Volume))
595 fileOUT_TotalHydratation.write("%.5f," % Total_Hydration) 546 fileOUT_TotalHydratation.write("%.5f," % Total_Hydration)
547 else:
548 fileOUT_PROT_ALL.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a"))
549 fileOUT_IVYWREL.write("%s,%s," %("n.a", "n.a"))
550 fileOUT_ERK_DNQTSH.write("%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a"))
551 fileOUT_EK_QH.write("%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a"))
552 fileOUT_FYMINK_GARP.write("%s,%s,%s,%s," %("n.a","n.a","n.a","n.a"))
553 fileOUT_AVLIMFYW.write("%s,%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a","n.a"))
554 fileOUT_STNQ.write("%s,%s," %("n.a","n.a"))
555 fileOUT_RHKDE.write("%s,%s,%s,%s,%s,%s,"%("n.a","n.a","n.a","n.a","n.a","n.a"))
556 fileOUT_PAYRE.write("%s,%s,%s,%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a"))
557 fileOUT_TotalResidueWeight.write("%s," %"n.a")
558 fileOUT_TotalResidueVolume.write("%s," %"n.a")
559 fileOUT_TotalPartialSpecificVolume.write("%s," %"n.a")
560 fileOUT_TotalHydratation.write("%s," %"n.a")
596 561
597 ## END LINE 562 ## END LINE
598 fileOUT_PROT_ALL.write("\n") 563 fileOUT_PROT_ALL.write("\n")
599 fileOUT_IVYWREL.write("\n") 564 fileOUT_IVYWREL.write("\n")
600 fileOUT_ERK_DNQTSH.write("\n") 565 fileOUT_ERK_DNQTSH.write("\n")
607 fileOUT_TotalResidueWeight.write("\n") 572 fileOUT_TotalResidueWeight.write("\n")
608 fileOUT_TotalResidueVolume.write("\n") 573 fileOUT_TotalResidueVolume.write("\n")
609 fileOUT_TotalPartialSpecificVolume.write("\n") 574 fileOUT_TotalPartialSpecificVolume.write("\n")
610 fileOUT_TotalHydratation.write("\n") 575 fileOUT_TotalHydratation.write("\n")
611 576
612
613
614
615