# HG changeset patch # User abims-sbr # Date 1519832211 18000 # Node ID 10f70fea57b3c24ee39374ba1022ef2bd2324bc4 # Parent 948864b6ab4bba2cf4b9ab6eab16a5d2c203c53c planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c diff -r 948864b6ab4b -r 10f70fea57b3 scripts/S04_find_orf.py --- a/scripts/S04_find_orf.py Wed Feb 28 06:03:31 2018 -0500 +++ b/scripts/S04_find_orf.py Wed Feb 28 10:36:51 2018 -0500 @@ -3,70 +3,62 @@ #python find_ORF.py file output def find_orf(entry): - orf={} - orf_length={} - stop=['TAA','TAG','TGA'] - for i in range(0,3): - pos=i - orf[i]=[0] - while posthreshold or high_minus[1]-high_minus[0]>threshold: - inc+=1 - if high_plus[1]-high_plus[0]>high_minus[1]-high_minus[0]: - file_OUT.write("%s" %name) - file_OUT.write(str(high_plus[1]-high_plus[0]+1)+"\n") - file_OUT.write("%s" %line[high_plus[0]:high_plus[1]+1]) - file_OUT.write("\n") - else: - file_OUT.write("%s" %name) - file_OUT.write(str(high_minus[1]-high_minus[0]+1)+"\n") - file_OUT.write("%s" %reverse[high_minus[0]:high_minus[1]+1]) - file_OUT.write("\n") -f_in.close() -file_OUT.close() - +with open (path_IN, "r") as f_in: + for ignored, line in itertools.izip_longest(*[f_in]*2): + name=">"+path_IN[:2]+str(inc)+"_1/1_1.000_" + high_plus=find_orf(line[:-1]) + reverse=reverse_seq(line[:-1]) + high_minus=find_orf(reverse) + if high_plus[1]-high_plus[0]>threshold or high_minus[1]-high_minus[0]>threshold: + inc+=1 + if high_plus[1]-high_plus[0]>high_minus[1]-high_minus[0]: + file_OUT.write("%s" %name) + file_OUT.write(str(high_plus[1]-high_plus[0]+1)+"\n") + file_OUT.write("%s" %line[high_plus[0]:high_plus[1]+1]) + file_OUT.write("\n") + else: + file_OUT.write("%s" %name) + file_OUT.write(str(high_minus[1]-high_minus[0]+1)+"\n") + file_OUT.write("%s" %reverse[high_minus[0]:high_minus[1]+1]) + file_OUT.write("\n") +file_OUT.close() diff -r 948864b6ab4b -r 10f70fea57b3 scripts/S05_filter.py --- a/scripts/S05_filter.py Wed Feb 28 06:03:31 2018 -0500 +++ b/scripts/S05_filter.py Wed Feb 28 10:36:51 2018 -0500 @@ -2,33 +2,20 @@ #filters the sequences depending on their length after cap3, makes the sequences names compatible with the phylogeny workflow #python filter.py file length_threshold_nucleotides output -import string, os, sys, re +import string, os, sys, re, itertools path_IN = sys.argv[1] -threshold=int(sys.argv[2]) #minimum number of nucleotides for one sequence +threshold = int(sys.argv[2]) #minimum number of nucleotides for one sequence file_OUT = open(sys.argv[3], "w") -f_in = open(path_IN, "r") -inc=1 -while 1: - line=f_in.readline() - if not line: - break - line=f_in.readline() - name=">"+path_IN[:2]+str(inc)+"_1/1_1.000_" - if len(line)-1>threshold-1: - inc+=1 - file_OUT.write("%s" %name) - file_OUT.write(str(len(line)-1)+"\n") - file_OUT.write("%s" %line) -f_in.close() +inc = 1 +with open(path_IN, "r") as f_in: + for ignored, sequence in itertools.izip_longest(*[f_in]*2): + name=">"+path_IN[:2]+str(inc)+"_1/1_1.000_" + if len(sequence)-1>threshold-1: + inc+=1 + file_OUT.write("%s" %name) + file_OUT.write(str(len(sequence)-1)+"\n") + file_OUT.write("%s" %sequence) file_OUT.close() - - -#filtre eventuel sur les petits transcrits - - - - - - +#filtre eventuel sur les petits transcrits \ No newline at end of file