Mercurial > repos > abims-sbr > mutcount
comparison scripts/S01b_study_seq_composition_aa.py @ 1:8de21b6eb110 draft
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
| author | abims-sbr |
|---|---|
| date | Wed, 27 Sep 2017 10:04:08 -0400 |
| parents | 78dd6454f6f0 |
| children | 988467f963f0 |
comparison
equal
deleted
inserted
replaced
| 0:78dd6454f6f0 | 1:8de21b6eb110 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # -*- coding: ascii -*- | 2 # -*- coding: ascii -*- |
| 3 ## Author: Eric FONTANILLAS | 3 ## Author: Eric FONTANILLAS |
| 4 ## Date: 21.12.10 | 4 ## Date: 21.12.10 |
| 5 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and two "cold" species: Pg, Pp) | 5 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and two "cold" species: Pg, Pp) |
| 6 import sys, os | 6 import sys,os,shutil,subprocess,string |
| 7 script_path = os.path.dirname(sys.argv[0]) | 7 script_path = os.path.dirname(sys.argv[0]) |
| 8 | 8 |
| 9 ############# | 9 ############# |
| 10 ### DEF 0 ### | 10 ### DEF 0 ### |
| 11 ############# | 11 ############# |
| 12 def simplify_fasta_name(fasta_name,LT): | 12 def simplify_fasta_name(fasta_name,LT): |
| 13 | |
| 14 for abbreviation in LT: | 13 for abbreviation in LT: |
| 15 if abbreviation in fasta_name: | 14 if abbreviation in fasta_name: |
| 16 new_fasta_name = abbreviation | 15 new_fasta_name = abbreviation |
| 17 | 16 |
| 18 return(new_fasta_name) | 17 return(new_fasta_name) |
| 419 | 418 |
| 420 | 419 |
| 421 ################### | 420 ################### |
| 422 ### RUN RUN RUN ### | 421 ### RUN RUN RUN ### |
| 423 ################### | 422 ################### |
| 424 import sys,os,zipfile,shutil,subprocess,string | 423 |
| 425 | 424 |
| 426 ##Create specific folders | 425 ##Create specific folders |
| 427 Path_IN_loci_NUC = "./IN_AA" | 426 Path_IN_loci_NUC = "./IN_AA" |
| 428 outpath= "./OUT" | 427 outpath= "./OUT" |
| 429 os.makedirs(Path_IN_loci_NUC) | 428 os.makedirs(Path_IN_loci_NUC) |
| 430 os.makedirs(outpath) | 429 os.makedirs(outpath) |
| 431 | 430 |
| 432 | 431 infiles = str.split(sys.argv[1], ",") |
| 433 #Check if the file is a zip or fasta file | 432 for file in infiles: |
| 434 | 433 os.system("cp %s %s" %(file, Path_IN_loci_NUC)) |
| 435 the_zip_file = zipfile.ZipFile(sys.argv[1]) | |
| 436 ret = the_zip_file.testzip() | |
| 437 | |
| 438 if ret is not None: | |
| 439 shutil.copy2(sys.argv[1], './IN_AA/input.fasta') | |
| 440 else: | |
| 441 cmd="unzip %s -d ./IN_AA"%(sys.argv[1]) | |
| 442 os.system(cmd) | |
| 443 | |
| 444 | |
| 445 | 434 |
| 446 ## 1 ## List taxa | 435 ## 1 ## List taxa |
| 447 LT=[] | 436 LT=[] |
| 448 cmd="grep '>' %s" % sys.argv[2] | 437 cmd="grep '>' %s" % sys.argv[2] |
| 449 result = subprocess.check_output(cmd, shell=True) | 438 result = subprocess.check_output(cmd, shell=True) |
| 454 LT.append(sp) | 443 LT.append(sp) |
| 455 print LT | 444 print LT |
| 456 | 445 |
| 457 | 446 |
| 458 ## 2 ## PathIN | 447 ## 2 ## PathIN |
| 459 fileIN_properties = open("%s/01_AminoAcid_Properties2.csv"%(script_path), "r") | 448 fileIN_properties = open("amino_acid_properties.csv", "r") |
| 460 Path_IN_loci_AA = "./IN_AA" | 449 Path_IN_loci_AA = "./IN_AA" |
| 461 #Path_IN_loci_AA = "02_CDS_No_Missing_Data_aa_CDS_withM" | 450 #Path_IN_loci_AA = "02_CDS_No_Missing_Data_aa_CDS_withM" |
| 462 Lloci_AA = os.listdir(Path_IN_loci_AA) | 451 Lloci_AA = os.listdir(Path_IN_loci_AA) |
| 463 | 452 |
| 464 ## 3 ## PathOUT | 453 ## 3 ## PathOUT |
| 465 | 454 |
| 466 ## 3.1 ## PROT composition | 455 ## 3.1 ## PROT composition |
| 467 fileOUT_PROT_ALL=open("./OUT/13_prot_compositions_All_AA.csv","w") | 456 fileOUT_PROT_ALL=open("./OUT/prot_compositions_All_AA.csv","w") |
| 468 fileOUT_PROT_ALL.write("LOCUS,") | 457 fileOUT_PROT_ALL.write("LOCUS,") |
| 469 for taxa in LT: | 458 for taxa in LT: |
| 470 fileOUT_PROT_ALL.write("%s_prop_K,%s_prop_R,%s_prop_A,%s_prop_F,%s_prop_I,%s_prop_L,%s_prop_M,%s_prop_V,%s_prop_W,%s_prop_N,%s_prop_Q,%s_prop_S,%s_prop_T,%s_prop_H,%s_prop_Y,%s_prop_C,%s_prop_D,%s_prop_E,%s_prop_P,%s_prop_G," %(taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa)) | 459 fileOUT_PROT_ALL.write("%s_prop_K,%s_prop_R,%s_prop_A,%s_prop_F,%s_prop_I,%s_prop_L,%s_prop_M,%s_prop_V,%s_prop_W,%s_prop_N,%s_prop_Q,%s_prop_S,%s_prop_T,%s_prop_H,%s_prop_Y,%s_prop_C,%s_prop_D,%s_prop_E,%s_prop_P,%s_prop_G," %(taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa)) |
| 471 fileOUT_PROT_ALL.write("\n") | 460 fileOUT_PROT_ALL.write("\n") |
| 472 | 461 |
| 473 ## 3.2 ## PROT IVYWREL | 462 ## 3.2 ## PROT IVYWREL |
| 474 fileOUT_IVYWREL=open("./OUT/14_IVYWREL.csv","w") | 463 fileOUT_IVYWREL=open("./OUT/IVYWREL.csv","w") |
| 475 fileOUT_IVYWREL.write("LOCUS,") | 464 fileOUT_IVYWREL.write("LOCUS,") |
| 476 for taxa in LT: | 465 for taxa in LT: |
| 477 fileOUT_IVYWREL.write("%s_count_IVYWREL,%s_prop_IVYWREL," %(taxa,taxa)) | 466 fileOUT_IVYWREL.write("%s_count_IVYWREL,%s_prop_IVYWREL," %(taxa,taxa)) |
| 478 fileOUT_IVYWREL.write("\n") | 467 fileOUT_IVYWREL.write("\n") |
| 479 | 468 |
| 480 | 469 |
| 481 ## 3.3 ## PROT ERK_DNQTSHA | 470 ## 3.3 ## PROT ERK_DNQTSHA |
| 482 fileOUT_ERK_DNQTSH=open("./OUT/15_ERK_DNQTSH.csv","w") | 471 fileOUT_ERK_DNQTSH=open("./OUT/ERK_DNQTSH.csv","w") |
| 483 fileOUT_ERK_DNQTSH.write("LOCUS,") | 472 fileOUT_ERK_DNQTSH.write("LOCUS,") |
| 484 for taxa in LT: | 473 for taxa in LT: |
| 485 fileOUT_ERK_DNQTSH.write("%s_count_ERK,%s_prop_ERK,%s_count_DNQTSH,%s_prop_DNQTSH,%s_ratio_ERK_vs_DNQTSH," %(taxa,taxa,taxa,taxa,taxa)) | 474 fileOUT_ERK_DNQTSH.write("%s_count_ERK,%s_prop_ERK,%s_count_DNQTSH,%s_prop_DNQTSH,%s_ratio_ERK_vs_DNQTSH," %(taxa,taxa,taxa,taxa,taxa)) |
| 486 fileOUT_ERK_DNQTSH.write("\n") | 475 fileOUT_ERK_DNQTSH.write("\n") |
| 487 | 476 |
| 488 ## 3.4 ## PROT EK_QH | 477 ## 3.4 ## PROT EK_QH |
| 489 fileOUT_EK_QH=open("./OUT/16_EK_QH.csv","w") | 478 fileOUT_EK_QH=open("./OUT/EK_QH.csv","w") |
| 490 fileOUT_EK_QH.write("LOCUS,") | 479 fileOUT_EK_QH.write("LOCUS,") |
| 491 for taxa in LT: | 480 for taxa in LT: |
| 492 fileOUT_EK_QH.write("%s_count_EK,%s_prop_EK,%s_count_QH,%s_prop_QH,%s_ratio_EK_vs_QH," %(taxa,taxa,taxa,taxa,taxa)) | 481 fileOUT_EK_QH.write("%s_count_EK,%s_prop_EK,%s_count_QH,%s_prop_QH,%s_ratio_EK_vs_QH," %(taxa,taxa,taxa,taxa,taxa)) |
| 493 fileOUT_EK_QH.write("\n") | 482 fileOUT_EK_QH.write("\n") |
| 494 | 483 |
| 495 | 484 |
| 496 ## 3.5 ## PROT FYMINK_GARP | 485 ## 3.5 ## PROT FYMINK_GARP |
| 497 fileOUT_FYMINK_GARP=open("./OUT/17_FYMINK_GARP.csv","w") | 486 fileOUT_FYMINK_GARP=open("./OUT/FYMINK_GARP.csv","w") |
| 498 fileOUT_FYMINK_GARP.write("LOCUS,") | 487 fileOUT_FYMINK_GARP.write("LOCUS,") |
| 499 for taxa in LT: | 488 for taxa in LT: |
| 500 fileOUT_FYMINK_GARP.write("%s_count_FYMINK,%s_prop_FYMINK,%s_count_GARP,%s_prop_GARP," %(taxa,taxa,taxa,taxa)) | 489 fileOUT_FYMINK_GARP.write("%s_count_FYMINK,%s_prop_FYMINK,%s_count_GARP,%s_prop_GARP," %(taxa,taxa,taxa,taxa)) |
| 501 fileOUT_FYMINK_GARP.write("\n") | 490 fileOUT_FYMINK_GARP.write("\n") |
| 502 | 491 |
| 503 | 492 |
| 504 ## 3.6 ## PROT AVLIMFYW | 493 ## 3.6 ## PROT AVLIMFYW |
| 505 fileOUT_AVLIMFYW=open("./OUT/18_AVLIMFYW.csv","w") | 494 fileOUT_AVLIMFYW=open("./OUT/AVLIMFYW.csv","w") |
| 506 fileOUT_AVLIMFYW.write("LOCUS,") | 495 fileOUT_AVLIMFYW.write("LOCUS,") |
| 507 for taxa in LT: | 496 for taxa in LT: |
| 508 fileOUT_AVLIMFYW.write("%s_count_AVLIMFYW,%s_prop_AVLIMFYW,%s_count_AVLIM,%s_prop_AVLIM,%s_count_FYW,%s_prop_FYW," %(taxa,taxa,taxa,taxa,taxa,taxa)) | 497 fileOUT_AVLIMFYW.write("%s_count_AVLIMFYW,%s_prop_AVLIMFYW,%s_count_AVLIM,%s_prop_AVLIM,%s_count_FYW,%s_prop_FYW," %(taxa,taxa,taxa,taxa,taxa,taxa)) |
| 509 fileOUT_AVLIMFYW.write("\n") | 498 fileOUT_AVLIMFYW.write("\n") |
| 510 | 499 |
| 511 ## 3.7 ## PROT STNQ | 500 ## 3.7 ## PROT STNQ |
| 512 fileOUT_STNQ=open("./OUT/19_STNQ.csv","w") | 501 fileOUT_STNQ=open("./OUT/STNQ.csv","w") |
| 513 fileOUT_STNQ.write("LOCUS,") | 502 fileOUT_STNQ.write("LOCUS,") |
| 514 for taxa in LT: | 503 for taxa in LT: |
| 515 fileOUT_STNQ.write("%s_count_STNQ,%s_prop_STNQ," %(taxa,taxa)) | 504 fileOUT_STNQ.write("%s_count_STNQ,%s_prop_STNQ," %(taxa,taxa)) |
| 516 fileOUT_STNQ.write("\n") | 505 fileOUT_STNQ.write("\n") |
| 517 | 506 |
| 518 ## 3.8 ## PROT RHKDE | 507 ## 3.8 ## PROT RHKDE |
| 519 fileOUT_RHKDE=open("./OUT/20_RHKDE.csv","w") | 508 fileOUT_RHKDE=open("./OUT/RHKDE.csv","w") |
| 520 fileOUT_RHKDE.write("LOCUS,") | 509 fileOUT_RHKDE.write("LOCUS,") |
| 521 for taxa in LT: | 510 for taxa in LT: |
| 522 fileOUT_RHKDE.write("%s_count_RHKDE,%s_prop_RHKDE,%s_count_RHK,%s_prop_RHK,%s_count_DE,%s_prop_DE," %(taxa,taxa,taxa,taxa,taxa,taxa)) | 511 fileOUT_RHKDE.write("%s_count_RHKDE,%s_prop_RHKDE,%s_count_RHK,%s_prop_RHK,%s_count_DE,%s_prop_DE," %(taxa,taxa,taxa,taxa,taxa,taxa)) |
| 523 fileOUT_RHKDE.write("\n") | 512 fileOUT_RHKDE.write("\n") |
| 524 | 513 |
| 525 ## 3.9 ## PROT DIDER CRITERIA | 514 ## 3.9 ## PROT DIDER CRITERIA |
| 526 fileOUT_PAYRE=open("./OUT/21_PAYRE-MVGDS.csv","w") | 515 fileOUT_PAYRE=open("./OUT/PAYRE-MVGDS.csv","w") |
| 527 fileOUT_PAYRE.write("LOCUS,") | 516 fileOUT_PAYRE.write("LOCUS,") |
| 528 for taxa in LT: | 517 for taxa in LT: |
| 529 fileOUT_PAYRE.write("%s_count_PAYRE,%s_prop_PAYRE,%s_count_AC,%s_prop_AC,%s_count_MVGDS,%s_prop_MVGDS,%s_ratio_PAYRE_vs_MVGDS,%s_ratio_AC_vs_MVGDS," %(taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa)) | 518 fileOUT_PAYRE.write("%s_count_PAYRE,%s_prop_PAYRE,%s_count_AC,%s_prop_AC,%s_count_MVGDS,%s_prop_MVGDS,%s_ratio_PAYRE_vs_MVGDS,%s_ratio_AC_vs_MVGDS," %(taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa)) |
| 530 fileOUT_PAYRE.write("\n") | 519 fileOUT_PAYRE.write("\n") |
| 531 | 520 |
| 532 ## 3.10 ## PROT Total residue weight | 521 ## 3.10 ## PROT Total residue weight |
| 533 fileOUT_TotalResidueWeight=open("./OUT/22_TotalResidueWeight.csv","w") | 522 fileOUT_TotalResidueWeight=open("./OUT/TotalResidueWeight.csv","w") |
| 534 fileOUT_TotalResidueWeight.write("LOCUS,") | 523 fileOUT_TotalResidueWeight.write("LOCUS,") |
| 535 for taxa in LT: | 524 for taxa in LT: |
| 536 fileOUT_TotalResidueWeight.write("%s_Total_Residue_Weight," %taxa) | 525 fileOUT_TotalResidueWeight.write("%s_Total_Residue_Weight," %taxa) |
| 537 fileOUT_TotalResidueWeight.write("\n") | 526 fileOUT_TotalResidueWeight.write("\n") |
| 538 | 527 |
| 539 ## 3.11 ## PROT Total residue volume | 528 ## 3.11 ## PROT Total residue volume |
| 540 fileOUT_TotalResidueVolume=open("./OUT/23_TotalResidueVolume.csv","w") | 529 fileOUT_TotalResidueVolume=open("./OUT/TotalResidueVolume.csv","w") |
| 541 fileOUT_TotalResidueVolume.write("LOCUS,") | 530 fileOUT_TotalResidueVolume.write("LOCUS,") |
| 542 for taxa in LT: | 531 for taxa in LT: |
| 543 fileOUT_TotalResidueVolume.write("%s_Total_Residue_Volume," %taxa) | 532 fileOUT_TotalResidueVolume.write("%s_Total_Residue_Volume," %taxa) |
| 544 fileOUT_TotalResidueVolume.write("\n") | 533 fileOUT_TotalResidueVolume.write("\n") |
| 545 | 534 |
| 546 ## 3.12 ## PROT Total partial specific volume | 535 ## 3.12 ## PROT Total partial specific volume |
| 547 fileOUT_TotalPartialSpecificVolume=open("./OUT/24_TotalPartialSpecificVolume.csv","w") | 536 fileOUT_TotalPartialSpecificVolume=open("./OUT/TotalPartialSpecificVolume.csv","w") |
| 548 fileOUT_TotalPartialSpecificVolume.write("LOCUS,") | 537 fileOUT_TotalPartialSpecificVolume.write("LOCUS,") |
| 549 for taxa in LT: | 538 for taxa in LT: |
| 550 fileOUT_TotalPartialSpecificVolume.write("%s_Total_Partial_Specific_Volume," %taxa) | 539 fileOUT_TotalPartialSpecificVolume.write("%s_Total_Partial_Specific_Volume," %taxa) |
| 551 fileOUT_TotalPartialSpecificVolume.write("\n") | 540 fileOUT_TotalPartialSpecificVolume.write("\n") |
| 552 | 541 |
| 553 ## 3.13 ## PROT Total hydratation | 542 ## 3.13 ## PROT Total hydratation |
| 554 fileOUT_TotalHydratation=open("./OUT/25_TotalHydratation.csv","w") | 543 fileOUT_TotalHydratation=open("./OUT/TotalHydratation.csv","w") |
| 555 fileOUT_TotalHydratation.write("LOCUS,") | 544 fileOUT_TotalHydratation.write("LOCUS,") |
| 556 for taxa in LT: | 545 for taxa in LT: |
| 557 fileOUT_TotalHydratation.write("%s_Total_Hydratation," %taxa) | 546 fileOUT_TotalHydratation.write("%s_Total_Hydratation," %taxa) |
| 558 fileOUT_TotalHydratation.write("\n") | 547 fileOUT_TotalHydratation.write("\n") |
| 559 | 548 |
| 621 fileOUT_TotalHydratation.write("\n") | 610 fileOUT_TotalHydratation.write("\n") |
| 622 | 611 |
| 623 | 612 |
| 624 | 613 |
| 625 | 614 |
| 615 |
