Mercurial > repos > abims-sbr > mutcount
diff scripts/S02b_study_seq_composition_nuc.py @ 2:988467f963f0 draft
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
| author | abims-sbr |
|---|---|
| date | Wed, 17 Jan 2018 08:57:49 -0500 |
| parents | 8de21b6eb110 |
| children | 5766f80370e7 |
line wrap: on
line diff
--- a/scripts/S02b_study_seq_composition_nuc.py Wed Sep 27 10:04:08 2017 -0400 +++ b/scripts/S02b_study_seq_composition_nuc.py Wed Jan 17 08:57:49 2018 -0500 @@ -1,63 +1,11 @@ #!/usr/bin/env python ## Author: Eric FONTANILLAS ## Date: 21.12.10 +## Last Version : 12/2017 by Victor Mataigne ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and one "cold" species: Pg) - import sys,os,shutil,subprocess, string - -############# -### DEF 0 ### - -def simplify_fasta_name(fasta_name,LT): - for abbreviation in LT: - if abbreviation in fasta_name: - new_fasta_name = abbreviation - - return(new_fasta_name) -########################################## - -########### -## DEF1 ## -########### -## Generates bash, with key = fasta name; value = sequence (WITH GAP, IF ANY, REMOVED IN THIS FUNCTION) - -def dico(fasta_file,LT): - - count_fastaName=0 - F1 = open(fasta_file, "r") - - bash1 = {} - while 1: - nextline = F1.readline() - #print nextline - if not nextline : - break - - if nextline[0] == ">": - count_fastaName = count_fastaName + 1 - fasta_name = nextline[1:-1] - nextline = F1.readline() - sequence = nextline[:-1] - - if fasta_name not in bash1.keys(): - fasta_name = simplify_fasta_name(fasta_name,LT) ### DEF 0 ### - bash1[fasta_name] = sequence - else: - print fasta_name - - # Find alignment length - kk = bash1.keys() - key0 = kk[0] - seq0 = bash1[key0] - ln_seq = len(seq0) - - F1.close() - - return(bash1) -##################################### - - +from functions import simplify_fasta_name, dico ################## ###### DEF2 ###### @@ -205,21 +153,28 @@ for locus in Lloci_NUC: print locus path_locus = "%s/%s" %(Path_IN_loci_NUC, locus) - bash = dico(path_locus,LT) + bash = dico(path_locus,LT) fileOUT_NUC.write("%s," %locus) fileOUT_percent_GC.write("%s," %locus) fileOUT_percent_purine.write("%s," %locus) fileOUT_Purine_Load.write("%s," %locus) - - if taxa in bash.keys(): - seq = bash[taxa] - percent_GC, percent_purine,prop_A, prop_T, prop_C, prop_G = base_composition(seq) ### DEF2 ### - TOTAL, DIFF_GC, DIFF_AT,PLI_GC,PLI_AT,PLI_GC_1000,PLI_AT_1000 = purine_loading(seq) ### DEF3 ### - fileOUT_NUC.write("%.5f,%.5f,%.5f,%.5f," %(prop_A,prop_T,prop_C,prop_G)) - fileOUT_percent_GC.write("%.5f," %percent_GC) - fileOUT_percent_purine.write("%.5f," %percent_purine) - fileOUT_Purine_Load.write("%d,%d,%d,%.5f,%.5f," %(TOTAL, DIFF_GC, DIFF_AT,PLI_GC_1000, PLI_AT_1000)) + + for taxa in LT: + if taxa in bash.keys(): + seq = bash[taxa] + percent_GC, percent_purine,prop_A, prop_T, prop_C, prop_G = base_composition(seq) ### DEF2 ### + TOTAL, DIFF_GC, DIFF_AT,PLI_GC,PLI_AT,PLI_GC_1000,PLI_AT_1000 = purine_loading(seq) ### DEF3 ### + fileOUT_NUC.write("%.5f,%.5f,%.5f,%.5f," %(prop_A,prop_T,prop_C,prop_G)) + fileOUT_percent_GC.write("%.5f," %percent_GC) + fileOUT_percent_purine.write("%.5f," %percent_purine) + fileOUT_Purine_Load.write("%d,%d,%d,%.5f,%.5f," %(TOTAL, DIFF_GC, DIFF_AT,PLI_GC_1000, PLI_AT_1000)) + else: + fileOUT_NUC.write("%s,%s,%s,%s," %("n.a","n.a","n.a","n.a")) + fileOUT_percent_GC.write("%s," %"n.a") + fileOUT_percent_purine.write("%s," %"n.a") + fileOUT_Purine_Load.write("%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a")) + fileOUT_NUC.write("\n") fileOUT_percent_GC.write("\n") fileOUT_percent_purine.write("\n") @@ -228,4 +183,3 @@ fileOUT_percent_GC.close() fileOUT_percent_purine.close() fileOUT_Purine_Load.close() -
