comparison scripts/S01b_study_seq_composition_aa.py @ 1:8de21b6eb110 draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
author abims-sbr
date Wed, 27 Sep 2017 10:04:08 -0400
parents 78dd6454f6f0
children 988467f963f0
comparison
equal deleted inserted replaced
0:78dd6454f6f0 1:8de21b6eb110
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # -*- coding: ascii -*- 2 # -*- coding: ascii -*-
3 ## Author: Eric FONTANILLAS 3 ## Author: Eric FONTANILLAS
4 ## Date: 21.12.10 4 ## Date: 21.12.10
5 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and two "cold" species: Pg, Pp) 5 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and two "cold" species: Pg, Pp)
6 import sys, os 6 import sys,os,shutil,subprocess,string
7 script_path = os.path.dirname(sys.argv[0]) 7 script_path = os.path.dirname(sys.argv[0])
8 8
9 ############# 9 #############
10 ### DEF 0 ### 10 ### DEF 0 ###
11 ############# 11 #############
12 def simplify_fasta_name(fasta_name,LT): 12 def simplify_fasta_name(fasta_name,LT):
13
14 for abbreviation in LT: 13 for abbreviation in LT:
15 if abbreviation in fasta_name: 14 if abbreviation in fasta_name:
16 new_fasta_name = abbreviation 15 new_fasta_name = abbreviation
17 16
18 return(new_fasta_name) 17 return(new_fasta_name)
419 418
420 419
421 ################### 420 ###################
422 ### RUN RUN RUN ### 421 ### RUN RUN RUN ###
423 ################### 422 ###################
424 import sys,os,zipfile,shutil,subprocess,string 423
425 424
426 ##Create specific folders 425 ##Create specific folders
427 Path_IN_loci_NUC = "./IN_AA" 426 Path_IN_loci_NUC = "./IN_AA"
428 outpath= "./OUT" 427 outpath= "./OUT"
429 os.makedirs(Path_IN_loci_NUC) 428 os.makedirs(Path_IN_loci_NUC)
430 os.makedirs(outpath) 429 os.makedirs(outpath)
431 430
432 431 infiles = str.split(sys.argv[1], ",")
433 #Check if the file is a zip or fasta file 432 for file in infiles:
434 433 os.system("cp %s %s" %(file, Path_IN_loci_NUC))
435 the_zip_file = zipfile.ZipFile(sys.argv[1])
436 ret = the_zip_file.testzip()
437
438 if ret is not None:
439 shutil.copy2(sys.argv[1], './IN_AA/input.fasta')
440 else:
441 cmd="unzip %s -d ./IN_AA"%(sys.argv[1])
442 os.system(cmd)
443
444
445 434
446 ## 1 ## List taxa 435 ## 1 ## List taxa
447 LT=[] 436 LT=[]
448 cmd="grep '>' %s" % sys.argv[2] 437 cmd="grep '>' %s" % sys.argv[2]
449 result = subprocess.check_output(cmd, shell=True) 438 result = subprocess.check_output(cmd, shell=True)
454 LT.append(sp) 443 LT.append(sp)
455 print LT 444 print LT
456 445
457 446
458 ## 2 ## PathIN 447 ## 2 ## PathIN
459 fileIN_properties = open("%s/01_AminoAcid_Properties2.csv"%(script_path), "r") 448 fileIN_properties = open("amino_acid_properties.csv", "r")
460 Path_IN_loci_AA = "./IN_AA" 449 Path_IN_loci_AA = "./IN_AA"
461 #Path_IN_loci_AA = "02_CDS_No_Missing_Data_aa_CDS_withM" 450 #Path_IN_loci_AA = "02_CDS_No_Missing_Data_aa_CDS_withM"
462 Lloci_AA = os.listdir(Path_IN_loci_AA) 451 Lloci_AA = os.listdir(Path_IN_loci_AA)
463 452
464 ## 3 ## PathOUT 453 ## 3 ## PathOUT
465 454
466 ## 3.1 ## PROT composition 455 ## 3.1 ## PROT composition
467 fileOUT_PROT_ALL=open("./OUT/13_prot_compositions_All_AA.csv","w") 456 fileOUT_PROT_ALL=open("./OUT/prot_compositions_All_AA.csv","w")
468 fileOUT_PROT_ALL.write("LOCUS,") 457 fileOUT_PROT_ALL.write("LOCUS,")
469 for taxa in LT: 458 for taxa in LT:
470 fileOUT_PROT_ALL.write("%s_prop_K,%s_prop_R,%s_prop_A,%s_prop_F,%s_prop_I,%s_prop_L,%s_prop_M,%s_prop_V,%s_prop_W,%s_prop_N,%s_prop_Q,%s_prop_S,%s_prop_T,%s_prop_H,%s_prop_Y,%s_prop_C,%s_prop_D,%s_prop_E,%s_prop_P,%s_prop_G," %(taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa)) 459 fileOUT_PROT_ALL.write("%s_prop_K,%s_prop_R,%s_prop_A,%s_prop_F,%s_prop_I,%s_prop_L,%s_prop_M,%s_prop_V,%s_prop_W,%s_prop_N,%s_prop_Q,%s_prop_S,%s_prop_T,%s_prop_H,%s_prop_Y,%s_prop_C,%s_prop_D,%s_prop_E,%s_prop_P,%s_prop_G," %(taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa))
471 fileOUT_PROT_ALL.write("\n") 460 fileOUT_PROT_ALL.write("\n")
472 461
473 ## 3.2 ## PROT IVYWREL 462 ## 3.2 ## PROT IVYWREL
474 fileOUT_IVYWREL=open("./OUT/14_IVYWREL.csv","w") 463 fileOUT_IVYWREL=open("./OUT/IVYWREL.csv","w")
475 fileOUT_IVYWREL.write("LOCUS,") 464 fileOUT_IVYWREL.write("LOCUS,")
476 for taxa in LT: 465 for taxa in LT:
477 fileOUT_IVYWREL.write("%s_count_IVYWREL,%s_prop_IVYWREL," %(taxa,taxa)) 466 fileOUT_IVYWREL.write("%s_count_IVYWREL,%s_prop_IVYWREL," %(taxa,taxa))
478 fileOUT_IVYWREL.write("\n") 467 fileOUT_IVYWREL.write("\n")
479 468
480 469
481 ## 3.3 ## PROT ERK_DNQTSHA 470 ## 3.3 ## PROT ERK_DNQTSHA
482 fileOUT_ERK_DNQTSH=open("./OUT/15_ERK_DNQTSH.csv","w") 471 fileOUT_ERK_DNQTSH=open("./OUT/ERK_DNQTSH.csv","w")
483 fileOUT_ERK_DNQTSH.write("LOCUS,") 472 fileOUT_ERK_DNQTSH.write("LOCUS,")
484 for taxa in LT: 473 for taxa in LT:
485 fileOUT_ERK_DNQTSH.write("%s_count_ERK,%s_prop_ERK,%s_count_DNQTSH,%s_prop_DNQTSH,%s_ratio_ERK_vs_DNQTSH," %(taxa,taxa,taxa,taxa,taxa)) 474 fileOUT_ERK_DNQTSH.write("%s_count_ERK,%s_prop_ERK,%s_count_DNQTSH,%s_prop_DNQTSH,%s_ratio_ERK_vs_DNQTSH," %(taxa,taxa,taxa,taxa,taxa))
486 fileOUT_ERK_DNQTSH.write("\n") 475 fileOUT_ERK_DNQTSH.write("\n")
487 476
488 ## 3.4 ## PROT EK_QH 477 ## 3.4 ## PROT EK_QH
489 fileOUT_EK_QH=open("./OUT/16_EK_QH.csv","w") 478 fileOUT_EK_QH=open("./OUT/EK_QH.csv","w")
490 fileOUT_EK_QH.write("LOCUS,") 479 fileOUT_EK_QH.write("LOCUS,")
491 for taxa in LT: 480 for taxa in LT:
492 fileOUT_EK_QH.write("%s_count_EK,%s_prop_EK,%s_count_QH,%s_prop_QH,%s_ratio_EK_vs_QH," %(taxa,taxa,taxa,taxa,taxa)) 481 fileOUT_EK_QH.write("%s_count_EK,%s_prop_EK,%s_count_QH,%s_prop_QH,%s_ratio_EK_vs_QH," %(taxa,taxa,taxa,taxa,taxa))
493 fileOUT_EK_QH.write("\n") 482 fileOUT_EK_QH.write("\n")
494 483
495 484
496 ## 3.5 ## PROT FYMINK_GARP 485 ## 3.5 ## PROT FYMINK_GARP
497 fileOUT_FYMINK_GARP=open("./OUT/17_FYMINK_GARP.csv","w") 486 fileOUT_FYMINK_GARP=open("./OUT/FYMINK_GARP.csv","w")
498 fileOUT_FYMINK_GARP.write("LOCUS,") 487 fileOUT_FYMINK_GARP.write("LOCUS,")
499 for taxa in LT: 488 for taxa in LT:
500 fileOUT_FYMINK_GARP.write("%s_count_FYMINK,%s_prop_FYMINK,%s_count_GARP,%s_prop_GARP," %(taxa,taxa,taxa,taxa)) 489 fileOUT_FYMINK_GARP.write("%s_count_FYMINK,%s_prop_FYMINK,%s_count_GARP,%s_prop_GARP," %(taxa,taxa,taxa,taxa))
501 fileOUT_FYMINK_GARP.write("\n") 490 fileOUT_FYMINK_GARP.write("\n")
502 491
503 492
504 ## 3.6 ## PROT AVLIMFYW 493 ## 3.6 ## PROT AVLIMFYW
505 fileOUT_AVLIMFYW=open("./OUT/18_AVLIMFYW.csv","w") 494 fileOUT_AVLIMFYW=open("./OUT/AVLIMFYW.csv","w")
506 fileOUT_AVLIMFYW.write("LOCUS,") 495 fileOUT_AVLIMFYW.write("LOCUS,")
507 for taxa in LT: 496 for taxa in LT:
508 fileOUT_AVLIMFYW.write("%s_count_AVLIMFYW,%s_prop_AVLIMFYW,%s_count_AVLIM,%s_prop_AVLIM,%s_count_FYW,%s_prop_FYW," %(taxa,taxa,taxa,taxa,taxa,taxa)) 497 fileOUT_AVLIMFYW.write("%s_count_AVLIMFYW,%s_prop_AVLIMFYW,%s_count_AVLIM,%s_prop_AVLIM,%s_count_FYW,%s_prop_FYW," %(taxa,taxa,taxa,taxa,taxa,taxa))
509 fileOUT_AVLIMFYW.write("\n") 498 fileOUT_AVLIMFYW.write("\n")
510 499
511 ## 3.7 ## PROT STNQ 500 ## 3.7 ## PROT STNQ
512 fileOUT_STNQ=open("./OUT/19_STNQ.csv","w") 501 fileOUT_STNQ=open("./OUT/STNQ.csv","w")
513 fileOUT_STNQ.write("LOCUS,") 502 fileOUT_STNQ.write("LOCUS,")
514 for taxa in LT: 503 for taxa in LT:
515 fileOUT_STNQ.write("%s_count_STNQ,%s_prop_STNQ," %(taxa,taxa)) 504 fileOUT_STNQ.write("%s_count_STNQ,%s_prop_STNQ," %(taxa,taxa))
516 fileOUT_STNQ.write("\n") 505 fileOUT_STNQ.write("\n")
517 506
518 ## 3.8 ## PROT RHKDE 507 ## 3.8 ## PROT RHKDE
519 fileOUT_RHKDE=open("./OUT/20_RHKDE.csv","w") 508 fileOUT_RHKDE=open("./OUT/RHKDE.csv","w")
520 fileOUT_RHKDE.write("LOCUS,") 509 fileOUT_RHKDE.write("LOCUS,")
521 for taxa in LT: 510 for taxa in LT:
522 fileOUT_RHKDE.write("%s_count_RHKDE,%s_prop_RHKDE,%s_count_RHK,%s_prop_RHK,%s_count_DE,%s_prop_DE," %(taxa,taxa,taxa,taxa,taxa,taxa)) 511 fileOUT_RHKDE.write("%s_count_RHKDE,%s_prop_RHKDE,%s_count_RHK,%s_prop_RHK,%s_count_DE,%s_prop_DE," %(taxa,taxa,taxa,taxa,taxa,taxa))
523 fileOUT_RHKDE.write("\n") 512 fileOUT_RHKDE.write("\n")
524 513
525 ## 3.9 ## PROT DIDER CRITERIA 514 ## 3.9 ## PROT DIDER CRITERIA
526 fileOUT_PAYRE=open("./OUT/21_PAYRE-MVGDS.csv","w") 515 fileOUT_PAYRE=open("./OUT/PAYRE-MVGDS.csv","w")
527 fileOUT_PAYRE.write("LOCUS,") 516 fileOUT_PAYRE.write("LOCUS,")
528 for taxa in LT: 517 for taxa in LT:
529 fileOUT_PAYRE.write("%s_count_PAYRE,%s_prop_PAYRE,%s_count_AC,%s_prop_AC,%s_count_MVGDS,%s_prop_MVGDS,%s_ratio_PAYRE_vs_MVGDS,%s_ratio_AC_vs_MVGDS," %(taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa)) 518 fileOUT_PAYRE.write("%s_count_PAYRE,%s_prop_PAYRE,%s_count_AC,%s_prop_AC,%s_count_MVGDS,%s_prop_MVGDS,%s_ratio_PAYRE_vs_MVGDS,%s_ratio_AC_vs_MVGDS," %(taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa))
530 fileOUT_PAYRE.write("\n") 519 fileOUT_PAYRE.write("\n")
531 520
532 ## 3.10 ## PROT Total residue weight 521 ## 3.10 ## PROT Total residue weight
533 fileOUT_TotalResidueWeight=open("./OUT/22_TotalResidueWeight.csv","w") 522 fileOUT_TotalResidueWeight=open("./OUT/TotalResidueWeight.csv","w")
534 fileOUT_TotalResidueWeight.write("LOCUS,") 523 fileOUT_TotalResidueWeight.write("LOCUS,")
535 for taxa in LT: 524 for taxa in LT:
536 fileOUT_TotalResidueWeight.write("%s_Total_Residue_Weight," %taxa) 525 fileOUT_TotalResidueWeight.write("%s_Total_Residue_Weight," %taxa)
537 fileOUT_TotalResidueWeight.write("\n") 526 fileOUT_TotalResidueWeight.write("\n")
538 527
539 ## 3.11 ## PROT Total residue volume 528 ## 3.11 ## PROT Total residue volume
540 fileOUT_TotalResidueVolume=open("./OUT/23_TotalResidueVolume.csv","w") 529 fileOUT_TotalResidueVolume=open("./OUT/TotalResidueVolume.csv","w")
541 fileOUT_TotalResidueVolume.write("LOCUS,") 530 fileOUT_TotalResidueVolume.write("LOCUS,")
542 for taxa in LT: 531 for taxa in LT:
543 fileOUT_TotalResidueVolume.write("%s_Total_Residue_Volume," %taxa) 532 fileOUT_TotalResidueVolume.write("%s_Total_Residue_Volume," %taxa)
544 fileOUT_TotalResidueVolume.write("\n") 533 fileOUT_TotalResidueVolume.write("\n")
545 534
546 ## 3.12 ## PROT Total partial specific volume 535 ## 3.12 ## PROT Total partial specific volume
547 fileOUT_TotalPartialSpecificVolume=open("./OUT/24_TotalPartialSpecificVolume.csv","w") 536 fileOUT_TotalPartialSpecificVolume=open("./OUT/TotalPartialSpecificVolume.csv","w")
548 fileOUT_TotalPartialSpecificVolume.write("LOCUS,") 537 fileOUT_TotalPartialSpecificVolume.write("LOCUS,")
549 for taxa in LT: 538 for taxa in LT:
550 fileOUT_TotalPartialSpecificVolume.write("%s_Total_Partial_Specific_Volume," %taxa) 539 fileOUT_TotalPartialSpecificVolume.write("%s_Total_Partial_Specific_Volume," %taxa)
551 fileOUT_TotalPartialSpecificVolume.write("\n") 540 fileOUT_TotalPartialSpecificVolume.write("\n")
552 541
553 ## 3.13 ## PROT Total hydratation 542 ## 3.13 ## PROT Total hydratation
554 fileOUT_TotalHydratation=open("./OUT/25_TotalHydratation.csv","w") 543 fileOUT_TotalHydratation=open("./OUT/TotalHydratation.csv","w")
555 fileOUT_TotalHydratation.write("LOCUS,") 544 fileOUT_TotalHydratation.write("LOCUS,")
556 for taxa in LT: 545 for taxa in LT:
557 fileOUT_TotalHydratation.write("%s_Total_Hydratation," %taxa) 546 fileOUT_TotalHydratation.write("%s_Total_Hydratation," %taxa)
558 fileOUT_TotalHydratation.write("\n") 547 fileOUT_TotalHydratation.write("\n")
559 548
621 fileOUT_TotalHydratation.write("\n") 610 fileOUT_TotalHydratation.write("\n")
622 611
623 612
624 613
625 614
615