# HG changeset patch # User abims-sbr # Date 1519832359 18000 # Node ID 59f4b9ffd92b0ce3a7d02514ea8d3cbef41a1054 # Parent fd707de25e4fb322a170b4cb43a05a0513b131bf planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c diff -r fd707de25e4f -r 59f4b9ffd92b scripts/S01_concatenate.py --- a/scripts/S01_concatenate.py Wed Feb 28 06:05:50 2018 -0500 +++ b/scripts/S01_concatenate.py Wed Feb 28 10:39:19 2018 -0500 @@ -3,24 +3,22 @@ ## Last modification: 17/06/2011 ## Subject: find and remove indels - ############################### ##### DEF 0 : Dico fasta ##### ############################### def dico(F2): dicoco = {} - while 1: - next2 = F2.readline() - if not next2: - break - if next2[0] == ">": - fasta_name_query = next2[:-1] - Sn = string.split(fasta_name_query, "||") - fasta_name_query = Sn[0] - next3 = F2.readline() - fasta_seq_query = next3[:-1] - dicoco[fasta_name_query]=fasta_seq_query - return(dicoco) + with open(F2, "r") as file: + for name, query in itertools.izip_longest(*[file]*2): + if not name: + break + if name[0] == ">": + fasta_name_query = name[:-1] + Sn = string.split(fasta_name_query, "||") + fasta_name_query = Sn[0] + fasta_seq_query = query[:-1] + dicoco[fasta_name_query]=fasta_seq_query + return dicoco ################################################################################### @@ -45,10 +43,8 @@ for file in L_IN: nb_locus=nb_locus+1 - ## a ## Open alignments - file_IN = open(file, "r") - dico_seq = dico(file_IN) ### DEF 0 ### - file_IN.close() + ## a ## Open alignments + dico_seq = dico(file) ### DEF 0 ### ## b ## Get alignment length + genes positions for RAxML key0 = dico_seq.keys()[0] ln = len(dico_seq[key0]) @@ -123,7 +119,7 @@ ####################### ##### RUN RUN RUN ##### ####################### -import string, os, time, re, sys, zipfile +import string, os, time, re, sys, itertools list_species = [] SPECIES_ID_LIST = []