Mercurial > repos > abims-sbr > cds_search
changeset 7:35e39b4128ba draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
author | abims-sbr |
---|---|
date | Wed, 28 Feb 2018 10:38:40 -0500 |
parents | c03ba6a7c5f2 |
children | 716a45028e55 |
files | scripts/S01_find_orf_on_multiple_alignment.py scripts/S02_remove_too_short_bit_or_whole_sequence.py scripts/S03_remove_site_with_not_enough_species_represented.py scripts/dico.py |
diffstat | 4 files changed, 31 insertions(+), 50 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/S01_find_orf_on_multiple_alignment.py Wed Feb 28 06:05:15 2018 -0500 +++ b/scripts/S01_find_orf_on_multiple_alignment.py Wed Feb 28 10:38:40 2018 -0500 @@ -17,20 +17,18 @@ def code_universel(F1): bash_codeUniversel = {} - while 1: - next = F1.readline() - if not next: break - L1 = string.split(next, " ") - length1 = len(L1) - if length1 == 3: - key = L1[0] - value = L1[2][:-1] - bash_codeUniversel[key] = value - else: - key = L1[0] - value = L1[2] - bash_codeUniversel[key] = value - F1.close() + with open(F1, "r") as file: + for line in file.readlines(): + L1 = string.split(line, " ") + length1 = len(L1) + if length1 == 3: + key = L1[0] + value = L1[2][:-1] + bash_codeUniversel[key] = value + else: + key = L1[0] + value = L1[2] + bash_codeUniversel[key] = value return(bash_codeUniversel) ########################################################### @@ -329,9 +327,7 @@ list_file = str.split(infiles,",") ### Get Universal Code -F2 = open(sys.argv[2], 'r') -bash_codeUniversel = code_universel(F2) ### DEF2 ### -F2.close() +bash_codeUniversel = code_universel(sys.argv[2]) ### DEF2 ### os.mkdir("04_BEST_ORF_nuc") Path_OUT1 = "04_BEST_ORF_nuc" @@ -359,10 +355,8 @@ for file in list_file: count_file_processed = count_file_processed + 1 - fasta_file_path = "./%s" %file - fasta_file = open(fasta_file_path, "r") - bash_fasta = dico(fasta_file) ### DEF 1 ### - fasta_file.close() + fasta_file_path = "./%s" %file + bash_fasta = dico(fasta_file_path) ### DEF 1 ### BESTORF_nuc, BESTORF_nuc_CODING, BESTORF_nuc_CDS_with_M, BESTORF_aa, BESTORF_aa_CODING, BESTORF_aa_CDS_with_M = find_good_ORF_criteria_3(bash_fasta, bash_codeUniversel) ### DEF 4 - PART 2 - ### ## a ## OUTPUT BESTORF_nuc
--- a/scripts/S02_remove_too_short_bit_or_whole_sequence.py Wed Feb 28 06:05:15 2018 -0500 +++ b/scripts/S02_remove_too_short_bit_or_whole_sequence.py Wed Feb 28 10:38:40 2018 -0500 @@ -78,8 +78,8 @@ path_OUT2 = "07_CDS_nuc" for file in L_IN1: - file_INaa = open("%s/%s" %(path_IN1, file), "r") - file_INnuc = open("%s/%s" %(path_IN2, file), "r") + file_INaa = "%s/%s" %(path_IN1, file) + file_INnuc = "%s/%s" %(path_IN2, file) dico_aa = dico(file_INaa) ### DEF 0 ### dico_nuc = dico(file_INnuc) ### DEF 0 ### @@ -174,9 +174,6 @@ if nb not in list_sp and nb != "sp0" : list_sp.append(nb) - file_INaa.close() - file_INnuc.close() - # [FILTER 5]: check if the number of locus with the max number of species isn't 0 #if it is : MIN_SPECIES_NB - 1 if len(list_sp) < MIN_SPECIES_NB :
--- a/scripts/S03_remove_site_with_not_enough_species_represented.py Wed Feb 28 06:05:15 2018 -0500 +++ b/scripts/S03_remove_site_with_not_enough_species_represented.py Wed Feb 28 10:38:40 2018 -0500 @@ -123,14 +123,11 @@ for file in L_IN1: - file_INaa = open("%s/%s" %(path_IN1, file), "r") - file_INnuc = open("%s/%s" %(path_IN2, file), "r") + file_INaa = "%s/%s" %(path_IN1, file) + file_INnuc = "%s/%s" %(path_IN2, file) dico_aa = dico(file_INaa) ### DEF 1 ### dico_nuc = dico(file_INnuc) ### DEF 1 ### - - file_INaa.close() - file_INnuc.close() if len(dico_aa) < MIN_SPECIES_NB : list_file.append(file) @@ -140,8 +137,8 @@ for file in L_IN1 : - file_INaa = open("%s/%s" %(path_IN1, file), "r") - file_INnuc = open("%s/%s" %(path_IN2, file), "r") + file_INaa = "%s/%s" %(path_IN1, file) + file_INnuc = "%s/%s" %(path_IN2, file) dico_aa = dico(file_INaa) ### DEF 1 ### dico_nuc = dico(file_INnuc) ### DEF 1 ### @@ -156,10 +153,6 @@ seq0 = filtered_bash_nuc[k0] new_leng_nuc = len(seq0) - ## 4.2 ## Close INPUT - file_INaa.close() - file_INnuc.close() - ## 4.3 ## Change file name for output, depending the number of species remaining in the alignment LS = string.split(file, "_NEW") LS = "".join(LS)
--- a/scripts/dico.py Wed Feb 28 06:05:15 2018 -0500 +++ b/scripts/dico.py Wed Feb 28 10:38:40 2018 -0500 @@ -1,16 +1,13 @@ -import string +import string, itertools -def dico(F1): +def dico(F1): dicoco = {} - while 1: - next2 = F1.readline() - if not next2: - break - if next2[0] == ">": - fasta_name_query = next2[:-1] - Sn = string.split(fasta_name_query, "||") - fasta_name_query = Sn[0] - next3 = F1.readline() - fasta_seq_query = next3[:-1] - dicoco[fasta_name_query]=fasta_seq_query + with open(F1, "r") as file: + for name, query in itertools.izip_longest(*[file]*2): + if name[0] == ">": + fasta_name_query = name[:-1] + Sn = string.split(fasta_name_query, "||") + fasta_name_query = Sn[0] + fasta_seq_query = query[:-1] + dicoco[fasta_name_query] = fasta_seq_query return(dicoco)