Mercurial > repos > charles_s_test > seqsero2
changeset 7:3d6680af0bec draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 16:30:27 -0500 |
parents | b6281a377a18 |
children | b04931a9fe71 |
files | SeqSero.py libs/BWA_analysis_H_update_new_family_dependent.py libs/BWA_analysis_O_new_dependent.py libs/H_combination_output_analysis.py libs/Otype_determine_analysis.py libs/__pycache__/Initial_functions.cpython-36.pyc libs/compare_and_change_two_fastq_id.py libs/deletion_compare.py libs/run_auto_All_for_assemblies.py libs/run_auto_All_for_web_multi_revise.py libs/special_gene_test_assemblies.py run_seqsero.py test.txt |
diffstat | 13 files changed, 190 insertions(+), 192 deletions(-) [+] |
line wrap: on
line diff
--- a/SeqSero.py Mon Nov 27 14:37:41 2017 -0500 +++ b/SeqSero.py Mon Nov 27 16:30:27 2017 -0500 @@ -26,11 +26,11 @@ mapping_mode=args.b dataset=args.i if mode_choice=="1": - print dataset[0] + print(dataset[0]) os.system("cp "+dataset[0]+" "+make_dir) os.chdir(make_dir) os.system("python2.7 "+dirpath+"/libs/run_auto_All_for_web_multi_revise.py "+dataset[0].split("/")[-1]+" "+mapping_mode+" 1") - print "\n\n\nResult:\n" + print("\n\n\nResult:\n") os.system("cat Seqsero_result.txt") os.system("rm "+dataset[0].split("/")[-1]) elif mode_choice=="2": @@ -39,21 +39,21 @@ fnameA=dataset[0].split("/")[-1] fnameB=dataset[1].split("/")[-1] os.chdir(make_dir) - print "check fastq id and make them in accordance with each other...please wait..." + print("check fastq id and make them in accordance with each other...please wait...") os.system("python2.7 "+dirpath+"/libs/run_auto_All_for_web_multi_revise.py "+fnameA+" "+mapping_mode+" "+fnameB+" 2") - print "\n\n\nResult:\n" + print("\n\n\nResult:\n") os.system("cat Seqsero_result.txt") elif mode_choice=="3": os.system("cp "+dataset[0]+" "+make_dir) os.chdir(make_dir) os.system("python2.7 "+dirpath+"/libs/run_auto_All_for_web_multi_revise.py "+dataset[0].split("/")[-1]+" "+mapping_mode+" 3") - print "\n\n\nResult:\n" + print("\n\n\nResult:\n") os.system("cat Seqsero_result.txt") elif mode_choice=="4": os.system("cp "+dataset[0]+" "+make_dir) os.chdir(make_dir) os.system("python2.7 "+dirpath+"/libs/run_auto_All_for_assemblies.py "+dataset[0].split("/")[-1]) - print "\n\n\nResult:\n" + print("\n\n\nResult:\n") os.system("cat Seqsero_result.txt") if __name__ == '__main__':
--- a/libs/BWA_analysis_H_update_new_family_dependent.py Mon Nov 27 14:37:41 2017 -0500 +++ b/libs/BWA_analysis_H_update_new_family_dependent.py Mon Nov 27 16:30:27 2017 -0500 @@ -104,23 +104,23 @@ for x in c: if x[:4]=="fljB": fljB[x]=c[x] - final_fliC=sorted(fliC.iteritems(), key=lambda d:d[1], reverse = True) #order from frequency high to low, but tuple while not list - final_fljB=sorted(fljB.iteritems(), key=lambda d:d[1], reverse = True) #order from frequency high to low, but tuple while not list - print "Final_filC_list:" - print final_fliC + final_fliC=sorted(fliC.items(), key=lambda d:d[1], reverse = True) #order from frequency high to low, but tuple while not list + final_fljB=sorted(fljB.items(), key=lambda d:d[1], reverse = True) #order from frequency high to low, but tuple while not list + print("Final_filC_list:") + print(final_fliC) num_1=0#new inserted num_2=0#new inserted if len(final_fliC)>0: #new inserted for x in final_fliC:#new inserted num_1=num_1+x[1]#new inserted - print "Final_fliC_number_together: ",num_1#new inserted - print "Final_fljB_list:" - print final_fljB + print("Final_fliC_number_together: ",num_1)#new inserted + print("Final_fljB_list:") + print(final_fljB) if len(final_fljB)>0: #new inserted for x in final_fljB: #new inserted num_2=num_2+x[1] #new inserted - print "Final_fljB_number_together: ",num_2#new inserted - print "$$Genome:",sra_name + print("Final_fljB_number_together: ",num_2)#new inserted + print("$$Genome:",sra_name) try: fliC_option=final_fliC[0][0].split("_")[1] except: @@ -132,16 +132,16 @@ if z==0: if len(final_fliC)==0 or num_1<=10: - print "$$$No fliC, due to no hit" + print("$$$No fliC, due to no hit") else: if final_fliC[0][1]<=1 and z==1: - print "$$$No fliC, due to the hit reads number is small." + print("$$$No fliC, due to the hit reads number is small.") else: try: family=final_fliC[0][0].split("_")[-1] Sero_list_C.append(family) description.append(final_fliC[0][0]) - print "$$Most possilble fliC family: ",Sero_list_C[0]," Number: ",final_fliC[0][1] + print("$$Most possilble fliC family: ",Sero_list_C[0]," Number: ",final_fliC[0][1]) i=0 for x in final_fliC: if x[0].split("_")[-1] not in Sero_list_C: @@ -153,7 +153,7 @@ if locals().has_key('sec_choice'): Sero_list_C.append(sec_choice) description.append(des) - print "$$Sec possilble fliC family: ",sec_choice," Number: ",number + print("$$Sec possilble fliC family: ",sec_choice," Number: ",number) j=0 for x in final_fliC: if x[0].split("_")[-1] not in Sero_list_C: @@ -165,25 +165,25 @@ if locals().has_key('third_choice'): Sero_list_C.append(third_choice) description.append(des) - print "$$Third possilble fliC family: ",third_choice," Number: ",number + print("$$Third possilble fliC family: ",third_choice," Number: ",number) except: - print "$$$No fliC, or failure of mapping" + print("$$$No fliC, or failure of mapping") try: ratio=float(num_2)/float(num_1) except: ratio=0 if len(final_fljB)==0 or num_2<=5 or ratio<0.15: - print "$$$No fljB, due to no hit" + print("$$$No fljB, due to no hit") else: if final_fljB[0][1]<=1 and z==1: - print "$$$No fljB, due to the hit reads number is small." + print("$$$No fljB, due to the hit reads number is small.") else: try: family=final_fljB[0][0].split("_")[-1] Sero_list_B.append(family) description.append(final_fljB[0][0]) - print "$$Most possilble fljB family: ",Sero_list_B[0]," Number: ",final_fljB[0][1] + print("$$Most possilble fljB family: ",Sero_list_B[0]," Number: ",final_fljB[0][1]) i=0 for x in final_fljB: if x[0].split("_")[-1] not in Sero_list_B: @@ -195,7 +195,7 @@ if locals().has_key('B_sec_choice'): Sero_list_B.append(B_sec_choice) description.append(des) - print "$$Sec possilble fljB: ",B_sec_choice," Number: ",number + print("$$Sec possilble fljB: ",B_sec_choice," Number: ",number) j=0 for x in final_fljB: if x[0].split("_")[-1] not in Sero_list_B: @@ -207,9 +207,9 @@ if locals().has_key('B_third_choice'): Sero_list_B.append(B_third_choice) description.append(des) - print "$$Third possilble fljB: ",B_third_choice," Number: ",number + print("$$Third possilble fljB: ",B_third_choice," Number: ",number) except: - print "$$$No fljB, or failure of mapping" + print("$$$No fljB, or failure of mapping") if len(description)==0: #used for the case which fljB and fliC both has no hit, it will directly cease the function return handle=SeqIO.parse("database/"+database,"fasta")########1/27/2015 @@ -250,10 +250,10 @@ Sero_list_B=[] if listtype=="fliC": if len(fliC_fljB_list)==0: - print "$$No fliC, due to no hit" #because the only possible situation for len(final_fliC)==0 is above (z=0) len(final_fliC)==0, so there is no need to use "$$$" here + print("$$No fliC, due to no hit") #because the only possible situation for len(final_fliC)==0 is above (z=0) len(final_fliC)==0, so there is no need to use "$$$" here else: if fliC_fljB_list[0][1]<=1: - print "$$No fliC, due to the hit reads number is small." #similiar with above, no "$$$" + print("$$No fliC, due to the hit reads number is small.") #similiar with above, no "$$$" else: if fliC_fljB_list[0][0].split("_")[-1]=="g,m": type="fliC" @@ -293,7 +293,7 @@ else: try: Sero_list_C.append(fliC_fljB_list[0][0].split("_")[1]) - print "$$$Most possilble fliC: ",Sero_list_C[0]," Number: ",fliC_fljB_list[0][1] + print("$$$Most possilble fliC: ",Sero_list_C[0]," Number: ",fliC_fljB_list[0][1]) i=0 for x in fliC_fljB_list: if x[0].split("_")[1] not in Sero_list_C: @@ -303,7 +303,7 @@ number=x[1] if locals().has_key('sec_choice'): Sero_list_C.append(sec_choice) - print "$$$Sec possilble fliC: ",sec_choice," Number: ",number + print("$$$Sec possilble fliC: ",sec_choice," Number: ",number) j=0 for x in fliC_fljB_list: if x[0].split("_")[1] not in Sero_list_C: @@ -313,16 +313,16 @@ number=x[1] if locals().has_key('third_choice'): Sero_list_C.append(third_choice) - print "$$$Third possilble fliC: ",third_choice," Number: ",number + print("$$$Third possilble fliC: ",third_choice," Number: ",number) except: - print "$$$No fliC, or failure of mapping (second run)" + print("$$$No fliC, or failure of mapping (second run)") if listtype=="fljB": if len(fliC_fljB_list)==0: - print "$$No fljB, due to no hit" #similiar with above, no "$$$" + print("$$No fljB, due to no hit") #similiar with above, no "$$$" else: if fliC_fljB_list[0][1]<=1: - print "$$No fljB, due to the hit reads number is small." #similiar with above, no "$$$" + print("$$No fljB, due to the hit reads number is small.") #similiar with above, no "$$$" else: if fliC_fljB_list[0][0].split("_")[-1]=="1": type="fljB" @@ -347,7 +347,7 @@ else: try: Sero_list_B.append(fliC_fljB_list[0][0].split("_")[1]) - print "$$$Most possilble fljB: ",Sero_list_B[0]," Number: ",fliC_fljB_list[0][1] + print("$$$Most possilble fljB: ",Sero_list_B[0]," Number: ",fliC_fljB_list[0][1]) i=0 for x in fliC_fljB_list: if x[0].split("_")[1] not in Sero_list_B: @@ -357,7 +357,7 @@ number=x[1] if locals().has_key('B_sec_choice'): Sero_list_B.append(B_sec_choice) - print "$$$Sec possilble fljB: ",B_sec_choice," Number: ",number + print("$$$Sec possilble fljB: ",B_sec_choice," Number: ",number) j=0 for x in fliC_fljB_list: if x[0].split("_")[1] not in Sero_list_B: @@ -367,9 +367,9 @@ number=x[1] if locals().has_key('B_third_choice'): Sero_list_B.append(B_third_choice) - print "$$$Third possilble fljB: ",B_third_choice," Number: ",number + print("$$$Third possilble fljB: ",B_third_choice," Number: ",number) except: - print "$$$No fljB, or failure of mapping (second run)" + print("$$$No fljB, or failure of mapping (second run)") def assembly(type,sra_name,for_fq,rev_fq,for_sai,rev_sai,sam,bam,database,database2,list_length,mapping_mode): @@ -421,18 +421,18 @@ score+=handle[i].alignments[j].hsps[z].bits List_score.append(score) temp=dict(zip(List,List_score)) - Final_list=sorted(temp.iteritems(), key=lambda d:d[1], reverse = True) + Final_list=sorted(temp.items(), key=lambda d:d[1], reverse = True) family=database2.split("_")[2] try: - Final_list[0][0].split("_")[1] # or it will always print "$$$Genome...."(next line) - print "$$$Genome:",sra_name - print "$$$Most possilble "+type+": ",Final_list[0][0].split("_")[1]," Score(due_to_special_test, number changed to score): ",Final_list[0][1] - print Final_list + Final_list[0][0].split("_")[1] # or it will always print("$$$Genome...."(next line) + print("$$$Genome:",sra_name) + print("$$$Most possilble "+type+": ",Final_list[0][0].split("_")[1]," Score(due_to_special_test, number changed to score): ",Final_list[0][1]) + print(Final_list) except: if type=="fliC": - print "$$$There may be no hit for "+type+"_"+family+" family due to the reads not covering core seqeunce, but just based on reads hit number, the most possible one is: ",fliC_option + print("$$$There may be no hit for "+type+"_"+family+" family due to the reads not covering core seqeunce, but just based on reads hit number, the most possible one is: ",fliC_option) if type=="fljB": - print "$$$There may be no hit for "+type+"_"+family+" family due to the reads not covering core seqeunce, but just based on reads hit number, the most possible one is: ",fljB_option + print("$$$There may be no hit for "+type+"_"+family+" family due to the reads not covering core seqeunce, but just based on reads hit number, the most possible one is: ",fljB_option) os.system("rm "+database2+"_vs_"+sam+".xml")###01/28/2015 os.system("rm "+database+sam+"_seq.txt")###01/28/2015 os.system("rm "+database+sam+"_title.txt")###01/28/2015
--- a/libs/BWA_analysis_O_new_dependent.py Mon Nov 27 14:37:41 2017 -0500 +++ b/libs/BWA_analysis_O_new_dependent.py Mon Nov 27 16:30:27 2017 -0500 @@ -64,7 +64,7 @@ for_fq=for_core_id+".fastq" rev_fq=re_core_id+".fastq" dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))#######03152016 - print "check fastq id and make them in accordance with each other...please wait..." + print("check fastq id and make them in accordance with each other...please wait...") os.system("python "+dirpath+"/compare_and_change_two_fastq_id.py "+for_fq+" "+rev_fq)#######03152016 for_sai=for_core_id+".sai" rev_sai=re_core_id+".sai" @@ -117,8 +117,8 @@ c=dict(zip(a,b)) final_O=sorted(c.iteritems(), key=lambda d:d[1], reverse = True) #order from frequency high to low, but tuple while not list Sero_list_O=[] - print "Final_Otype_list:" - print final_O + print("Final_Otype_list:") + print(final_O) num_1=0#new inserted O9_wbav=0 O310_wzx=0 @@ -140,12 +140,12 @@ O_choice="" - print "$$$Genome:",sra_name + print("$$$Genome:",sra_name) if len(final_O)==0: - print "$$$No Otype, due to no hit" + print("$$$No Otype, due to no hit") else: if final_O[0][1]<8: - print "$$$No Otype, due to the hit reads number is small." + print("$$$No Otype, due to the hit reads number is small.") else: for x in final_O: if x[1]>5: @@ -154,17 +154,17 @@ for x in final_O:# if "sdf" in x[0] and x[1]>3:# qq=0# - print "$$$",x[0],"got a hit, reads:",x[1]# + print("$$$",x[0],"got a hit, reads:",x[1])# if qq!=0:# - print "$$$No sdf exists"# + print("$$$No sdf exists")# if "O-9,46_wbaV" in O_list and float(O9_wbaV)/float(num_1) > 0.1: if "O-9,46_wzy" in O_list and float(O946_wzy)/float(num_1) > 0.1: O_choice="O-9,46" - print "$$$Most possilble Otype: O-9,46" + print("$$$Most possilble Otype: O-9,46") elif "O-9,46,27_partial_wzy" in O_list and float(O94627)/float(num_1) > 0.1: O_choice="O-9,46,27" - print "$$$Most possilble Otype: O-9,46,27" + print("$$$Most possilble Otype: O-9,46,27") else: O_choice="O-9" if file_mode=="3": @@ -176,18 +176,18 @@ elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1: if "O-3,10_not_in_1,3,19" in O_list and float(O310_no_1319)/float(num_1) > 0.1: O_choice="O-3,10" - print "$$$Most possilble Otype: O-3,10" + print("$$$Most possilble Otype: O-3,10") else: O_choice="O-1,3,19" - print "$$$Most possilble Otype: O-1,3,19" + print("$$$Most possilble Otype: O-1,3,19") else: try: O_choice=final_O[0][0].split("_")[0] if O_choice=="O-1,3,19": O_choice=final_O[1][0].split("_")[0] - print "$$$Most possilble Otype: ",O_choice + print("$$$Most possilble Otype: ",O_choice) except: - print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)" + print("$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)") def assembly(sra_name,potential_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode): @@ -246,15 +246,15 @@ O2_bigger+=1 except: continue - print "$$$Genome:",sra_name + print("$$$Genome:",sra_name) if O9_bigger>O2_bigger: - print "$$$Most possible Otype is O-9" + print("$$$Most possible Otype is O-9") elif O9_bigger<O2_bigger: - print "$$$Most possible Otype is O-2" + print("$$$Most possible Otype is O-2") else: - print "$$$No suitable one, because can't distinct it's O-9 or O-2, but ",potential_choice," has a more possibility." - print "O-9 number is:",O9_bigger - print "O-2 number is:",O2_bigger + print("$$$No suitable one, because can't distinct it's O-9 or O-2, but ",potential_choice," has a more possibility.") + print("O-9 number is:",O9_bigger) + print("O-2 number is:",O2_bigger) os.system("rm "+sam+"_title.txt")###01/28/2015 os.system("rm "+sam+"_seq.txt")###01/28/2015
--- a/libs/H_combination_output_analysis.py Mon Nov 27 14:37:41 2017 -0500 +++ b/libs/H_combination_output_analysis.py Mon Nov 27 16:30:27 2017 -0500 @@ -225,21 +225,21 @@ combination_score.append(score) combinationlist=dict(zip(combination,combination_score)) #we can do the filteration here final_dict=sorted(combinationlist.iteritems(), key=lambda d:d[1], reverse = True) - print "$$_H:Order:",final_dict + print("$$_H:Order:",final_dict) elif score>100 and fljB_score<100: - print "$$_H:No fljB, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice + print("$$_H:No fljB, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice) elif score<100 and fljB_score>100: - print "$$_H:No fliC, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice + print("$$_H:No fliC, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice) elif score==1 and fljB_score>100: - print "$$_H:No fliC (file) existed, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice + print("$$_H:No fliC (file) existed, only fljB, and its order:",fljB_First_Choice,fljB_Sec_Choice,fljB_Third_Choice) elif score==1 and fljB_score<100: - print "$$_H:No fliC (file) existed, and no fljB" + print("$$_H:No fliC (file) existed, and no fljB") elif score>100 and fljB_score==1: - print "$$_H:No fljB (file) existed, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice + print("$$_H:No fljB (file) existed, only fliC, and its order:",First_Choice,Sec_Choice,Third_Choice) elif score<100 and fljB_score==1: - print "$$_H:No fljB (file) existed, and no fliC" + print("$$_H:No fljB (file) existed, and no fliC") else: - print "$$_H:No fliC and fljB" + print("$$_H:No fliC and fljB") '''
--- a/libs/Otype_determine_analysis.py Mon Nov 27 14:37:41 2017 -0500 +++ b/libs/Otype_determine_analysis.py Mon Nov 27 16:30:27 2017 -0500 @@ -49,17 +49,17 @@ O_2_score=O_2_score+hsp.bits if O_9_score>100: if O_9_score>O_2_score: - print '$$$ Most possible O_type: O-9','\n' - print '$$$ longest_bit_score:',O_9_score,'\n' + print('$$$ Most possible O_type: O-9','\n') + print('$$$ longest_bit_score:',O_9_score,'\n') else: - print '$$$ Most possible O_type: O-2','\n' - print '$$$ longest_bit_score:',O_2_score,'\n' + print('$$$ Most possible O_type: O-2','\n') + print('$$$ longest_bit_score:',O_2_score,'\n') else: - print "Assumpition wrong, no O2 or O9, return to re-analysis" + print("Assumpition wrong, no O2 or O9, return to re-analysis") if ('O-2_' not in Sec_Choice) and ('O-9_' not in Sec_Choice): - print '$$$ Most possible O_type Choice (no tyr difference):',Sec_Choice + print('$$$ Most possible O_type Choice (no tyr difference):',Sec_Choice) if (('O-2_' in Sec_Choice) or ('O-9_' in Sec_Choice)) and ('O-2_' not in Third_Choice) and ('O-9_' not in Third_Choice): - print '$$$ Most possible O_type Choice (no tyr difference):',Third_Choice + print('$$$ Most possible O_type Choice (no tyr difference):',Third_Choice) os.system("rm tyr_of_O2_O9.fasta_db.*")###01/28/2015 os.system("rm "+xml_file)###01/28/2015 if subdatabase=="oafA_of_O4_O5.fasta": @@ -76,17 +76,17 @@ O_2_score=O_2_score+hsp.bits if O_9_score>100: if O_9_score>O_2_score: - print '$$$O5_none_7_base_deletion','\n' - print '$$$ longest_bit_score:',O_9_score,'\n' + print('$$$O5_none_7_base_deletion','\n') + print('$$$ longest_bit_score:',O_9_score,'\n') else: - print '$$$O5-','\n' - print '$$$ longest_bit_score:',O_2_score,'\n' + print('$$$O5-','\n' + print('$$$ longest_bit_score:',O_2_score,'\n') else: - print '$$$O5_none_7_base_deletion,unsure','\n' + print('$$$O5_none_7_base_deletion,unsure','\n') os.system("rm oafA_of_O4_O5.fasta_db.*")###01/28/2015 os.system("rm "+xml_file)###01/28/2015 except: - print "No oafA genes" + print("No oafA genes") if subdatabase=="O_3,10_and_1,3,19_spe.fasta": try: for record in records: #there are many records (i.e. the '>' in query file), so change another method @@ -101,16 +101,16 @@ O_2_score=O_2_score+hsp.bits if O_9_score>200: if O_9_score>O_2_score: - print '$$$O3,10 more possible','\n' - print '$$$ longest_bit_score:',O_9_score,'\n' + print('$$$O3,10 more possible','\n' + print('$$$ longest_bit_score:',O_9_score,'\n' else: if O_2_score>100: - print '$$$O1,3,19 more possible','\n' - print '$$$ longest_bit_score:',O_2_score,'\n' + print('$$$O1,3,19 more possible','\n' + print('$$$ longest_bit_score:',O_2_score,'\n' os.system("rm O_3,10_and_1,3,19_spe.fasta_db.*")###01/28/2015 os.system("rm "+xml_file)###01/28/2015 except: - print "No O3,10_and_O1,3,19 spe sequences" + print("No O3,10_and_O1,3,19 spe sequences" def show_result(): @@ -231,25 +231,25 @@ else: names=First_Choice+Sec_Choice if score==0: - print "$$$ No O_type, due to no hit of rfb" + print("$$$ No O_type, due to no hit of rfb") names="" if 'O-2_' in names and 'O-9_' in names and ('O-2_' in First_Choice or 'O-9_' in First_Choice): - print '#Contain O2 and O9, so change to special test' + print('#Contain O2 and O9, so change to special test') test_O29("tyr_of_O2_O9.fasta") else: if score>0: - print '$$$ Most possible O_type: ',First_Choice,'\n' - print '$$$ Most bit_score:',score,'\n' + print('$$$ Most possible O_type: ',First_Choice,'\n') + print('$$$ Most bit_score:',score,'\n') if "O-4_" in First_Choice:#$$$$$$$ test_O29("oafA_of_O4_O5.fasta")#$$$$$$$ if "O-1,3,19" in First_Choice or "O-3,10" in First_Choice: test_O29("O_3,10_and_1,3,19_spe.fasta")#$$$$$$$ if secscore>0: - print '$$$ Second possible O_type: ',Sec_Choice,'\n' - print '$$$ Second bit_score:',secscore,'\n' + print('$$$ Second possible O_type: ',Sec_Choice,'\n') + print('$$$ Second bit_score:',secscore,'\n') if thirdscore>0: - print '$$$ Third possible O_type: ',Third_Choice,'\n' - print '$$$ Third bit_score:',thirdscore,'\n' + print('$$$ Third possible O_type: ',Third_Choice,'\n' + print('$$$ Third bit_score:',thirdscore,'\n') @@ -261,13 +261,13 @@ target=sys.argv[2] database=sys.argv[3] output=target.split('.')[0]+'_out.fa' -print "$$:",target +print("$$:",target os.system(Makebltdb+' -in '+target+' -out '+target+'_db '+'-dbtype nucl')###01/28/2015 os.system(Blastnpth+' -query '+queries+' -db '+target+'_db '+'-out '+queries+'_vs_'+target+'.xml '+'-outfmt 5')###01/28/2015, since it's abs address for "run_auto*.py", so no need to change "query" address this time xml_file=queries+'_vs_'+target+'.xml' -print '\n' +print('\n' result_handle=open(xml_file) blast_record=NCBIXML.parse(result_handle) blast_record=list(blast_record) @@ -277,13 +277,13 @@ #os.system("rm "+target+'_db.*')###01/28/2015 if len(blast_record)==2: - print 'Hits have been got'+'\n' + print('Hits have been got'+'\n') if len(blast_record[0].alignments)==1 and len(blast_record[1].alignments)==1: - print 'Checking the number of alignments: 2 alignments obtained'+'\n' + print('Checking the number of alignments: 2 alignments obtained'+'\n') if len(blast_record[0].alignments[0].hsps)==1 and len(blast_record[1].alignments[0].hsps)==1: - print 'Checking the number of hsps: each alignment has 1 hsp'+'\n' + print('Checking the number of hsps: each alignment has 1 hsp'+'\n') if blast_record[0].alignments[0].hit_def==blast_record[1].alignments[0].hit_def: - print 'Checking locations of hits: Both hits are located in '+'"'+str(blast_record[0].alignments[0].hit_def)+'"'+'...'+'\n' + print('Checking locations of hits: Both hits are located in '+'"'+str(blast_record[0].alignments[0].hit_def)+'"'+'...'+'\n') hit_1_start=blast_record[0].alignments[0].hsps[0].sbjct_start hit_1_end=blast_record[0].alignments[0].hsps[0].sbjct_end @@ -301,11 +301,11 @@ hit_2_start=hit_2_end hit_2_end=buffer - print 'hit_1_start: '+str(hit_1_start) - print 'hit_1_end: '+str(hit_1_end) + print('hit_1_start: '+str(hit_1_start) + print('hit_1_end: '+str(hit_1_end) - print 'hit_2_start: '+str(hit_2_start) - print 'hit_2_end: '+str(hit_2_end) + print('hit_2_start: '+str(hit_2_start) + print('hit_2_end: '+str(hit_2_end) if hit_1_end<hit_2_start: @@ -315,15 +315,15 @@ extract_start=hit_2_end+1 extract_end=hit_1_start-1 - print 'start: '+str(extract_start), 'end: '+str(extract_end)+'\n' + print('start: '+str(extract_start), 'end: '+str(extract_end)+'\n') for contig in target_seq: if (contig.description==blast_record[0].alignments[0]) or (contig.description.replace(" ","")==blast_record[0].alignments[0].hit_def.replace(" ","")): target_contig=contig rfb_region=target_contig[extract_start:extract_end] - print 'Extracted rfb region length: '+str(len(rfb_region.seq.tostring()))+'\n' - print 'Extracted rfb region saved in: '+output+'\n' + print('Extracted rfb region length: '+str(len(rfb_region.seq.tostring()))+'\n') + print('Extracted rfb region saved in: '+output+'\n') outfile=open(output,'w') title='>'+target.split('.')[0]+' rfb region:'+blast_record[0].alignments[0].hit_def+':'+str(extract_start)+' to '+str(extract_end)+'_'+str(len(rfb_region.seq.tostring()))+'bp'+')' @@ -336,7 +336,7 @@ os.system(Makebltdb+' -in '+database+' -out '+database+'_db '+'-dbtype nucl') os.system(Blastnpth+' -query '+output+' -db '+database+'_db '+'-out '+'Blast_Otype_'+target+'.xml '+'-outfmt 5') xml_file='Blast_Otype_'+target+'.xml' - print '\n' + print('\n' filehandle=open(xml_file) @@ -349,7 +349,7 @@ show_result() else: - print 'Checking locations of hits: the two hits are not located in same contig......'+'\n' + print('Checking locations of hits: the two hits are not located in same contig......'+'\n') hit_1_start=blast_record[0].alignments[0].hsps[0].sbjct_start hit_1_end=blast_record[0].alignments[0].hsps[0].sbjct_end hit_2_start=blast_record[1].alignments[0].hsps[0].sbjct_start @@ -400,7 +400,7 @@ os.system(Makebltdb+' -in '+database+' -out '+database+'_db '+'-dbtype nucl') os.system(Blastnpth+' -query '+output+' -db '+database+'_db '+'-out '+'Blast_Otype_'+target+'.xml '+'-outfmt 5') xml_file='Blast_Otype_'+target+'.xml' - print '\n' + print('\n') filehandle=open(xml_file) @@ -431,7 +431,7 @@ os.system(Blastnpth+' -query combined_sequence.fasta'+' -db '+database+'_db '+'-out '+'Combined_seq_blast_'+target+'.xml '+'-outfmt 5') xml_file='Combined_seq_blast_'+target+'.xml' - print '\n' + print('\n' filehandle=open(xml_file) records=NCBIXML.parse(filehandle) @@ -443,18 +443,18 @@ else: - print '$$$ No O_type result, please check the number of hsps: some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for for our short sequence gnd and galF, please check your submited sequence'+'\n' + print('$$$ No O_type result, please check the number of hsps: some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for for our short sequence gnd and galF, please check your submited sequence'+'\n' elif len(blast_record[0].alignments)>1 and len(blast_record[1].alignments)==1: - print 'The gnd gene is splited on different contigs of your submitted sequence' +'\n' + print('The gnd gene is splited on different contigs of your submitted sequence' +'\n' for record in blast_record: for alignment in record.alignments: if len(alignment.hsps)!=1: - print '$$$ No O_type result, please check the number of hsp: some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for our short sequence gnd and galF, please check your submited sequence'+'\n' + print('$$$ No O_type result, please check the number of hsp: some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for our short sequence gnd and galF, please check your submited sequence'+'\n' break - print 'Each alignment has one hsps'+'\n' + print('Each alignment has one hsps'+'\n' @@ -515,12 +515,12 @@ os.system(Makebltdb+' -in '+database+' -out '+database+'_db '+'-dbtype nucl') os.system(Blastnpth+' -query '+output+' -db '+database+'_db '+'-out '+'Blast_Otype_'+target+'.xml '+'-outfmt 5') - print '\n' + print('\n') xml_file2='Blast_Otype_'+target+'.xml' filehandle=open(xml_file2) records=NCBIXML.parse(filehandle) records=list(records) - print len(records) + print(len(records)) realrecord1=records[0] if len(records[1].alignments)>len(records[0].alignments): realrecord1=records[1] @@ -548,7 +548,7 @@ os.system(Blastnpth+' -query combined_sequence.fasta'+' -db '+database+'_db '+'-out '+'Combined_seq_blast_'+target+'.xml '+'-outfmt 5') xml_file='Combined_seq_blast_'+target+'.xml' - print '\n' + print('\n') filehandle=open(xml_file) @@ -560,14 +560,14 @@ elif len(blast_record[0].alignments)==1 and len(blast_record[1].alignments)>1: - print 'The galF gene is splited on different contigs of your submitted sequence' +'\n' + print('The galF gene is splited on different contigs of your submitted sequence' +'\n') for record in blast_record: for alignment in record.alignments: if len(alignment.hsps)!=1: - print '$$$ No O_type result, please check the number of hsps: some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for our short sequence gnd and galF, please check your submited sequence'+'\n' + print('$$$ No O_type result, please check the number of hsps: some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for our short sequence gnd and galF, please check your submited sequence'+'\n') break - print 'Each alignment has one hsp'+'\n' + print('Each alignment has one hsp'+'\n') outfile=open(output,'w') for alignment in blast_record[0].alignments: @@ -622,7 +622,7 @@ os.system(Makebltdb+' -in '+database+' -out '+database+'_db '+'-dbtype nucl') os.system(Blastnpth+' -query '+output+' -db '+database+'_db '+'-out '+'Blast_Otype_'+target+'.xml '+'-outfmt 5') - print '\n' + print('\n') xml_file='Blast_Otype_'+target+'.xml' filehandle=open(xml_file) records=NCBIXML.parse(filehandle) @@ -654,7 +654,7 @@ os.system(Blastnpth+' -query combined_sequence.fasta'+' -db '+database+'_db '+'-out '+'Combined_seq_blast_'+target+'.xml '+'-outfmt 5') xml_file='Combined_seq_blast_'+target+'.xml' - print '\n' + print('\n') filehandle=open(xml_file) @@ -665,14 +665,14 @@ elif len(blast_record[0].alignments)>1 and len(blast_record[1].alignments)>1: - print 'The gnd and galF gene are both splited on different contigs of your submitted sequence' +'\n' + print('The gnd and galF gene are both splited on different contigs of your submitted sequence' +'\n') for record in blast_record: for alignment in record.alignments: if len(alignment.hsps)!=1: - print '$$$ No O_type result, please check the number of hsp: some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for our short sequence gnd and galF, please check your submited sequence'+'\n' + print('$$$ No O_type result, please check the number of hsp: some alignment have more than 1 hsp (galF or gnd sequences has one more hits in tested genome), that\'s unusual for our short sequence gnd and galF, please check your submited sequence'+'\n') break - print 'Each alignment has one hsps'+'\n' + print('Each alignment has one hsps'+'\n') outfile=open(output,'w') for alignment in blast_record[0].alignments: @@ -728,7 +728,7 @@ os.system(Makebltdb+' -in '+database+' -out '+database+'_db '+'-dbtype nucl') os.system(Blastnpth+' -query '+output+' -db '+database+'_db '+'-out '+'Blast_Otype_'+target+'.xml '+'-outfmt 5') xml_file='Blast_Otype_'+target+'.xml' - print '\n' + print('\n') filehandle=open(xml_file) records=NCBIXML.parse(filehandle) records=list(records) @@ -759,7 +759,7 @@ os.system(Blastnpth+' -query combined_sequence.fasta'+' -db '+database+'_db '+'-out '+'Combined_seq_blast_'+target+'.xml '+'-outfmt 5') xml_file='Combined_seq_blast_'+target+'.xml' - print '\n' + print('\n') filehandle=open(xml_file) @@ -771,7 +771,7 @@ else: - print '$$$ $$$ No O_type result, Attention: unusual number of hits, no hits for galF or gnd! Check blast output...'+'\n' + print('$$$ $$$ No O_type result, Attention: unusual number of hits, no hits for galF or gnd! Check blast output...'+'\n') os.system('rm '+target+'_db.'+'*')
--- a/libs/compare_and_change_two_fastq_id.py Mon Nov 27 14:37:41 2017 -0500 +++ b/libs/compare_and_change_two_fastq_id.py Mon Nov 27 16:30:27 2017 -0500 @@ -17,11 +17,11 @@ if a_title==b_title: pass else: - print "changing the title of two seperated fastq files..." - print a_title,b_title + print("changing the title of two seperated fastq files...") + print(a_title,b_title) os.system("sed "+"-i 's/.1 / /g' "+file1) - print "finished file1" + print("finished file1") os.system("sed "+"-i 's/.2 / /g' "+file2) - print "finished file2" + print("finished file2") compare_and_change_two_fastq_id(file1,file2) \ No newline at end of file
--- a/libs/deletion_compare.py Mon Nov 27 14:37:41 2017 -0500 +++ b/libs/deletion_compare.py Mon Nov 27 16:30:27 2017 -0500 @@ -120,15 +120,15 @@ O2_bigger+=1 except: continue - print "$$$Genome:",sra_name + print("$$$Genome:",sra_name) if O9_bigger>O2_bigger: - print "$$$Typhimurium" + print("$$$Typhimurium") elif O9_bigger<O2_bigger: - print "$$$Typhimurium_O5-" + print("$$$Typhimurium_O5-") else: - print "$$$Typhimurium, even no 7 bases difference" - print "O-4 number is:",O9_bigger - print "O-4_5- number is:",O2_bigger + print("$$$Typhimurium, even no 7 bases difference") + print("O-4 number is:",O9_bigger) + print("O-4_5- number is:",O2_bigger) os.system("rm "+sam+"_title.txt")###01/28/2015 os.system("rm "+sam+"_seq.txt")###01/28/2015 os.system("rm "+sam+".fasta")###01/28/2015
--- a/libs/run_auto_All_for_assemblies.py Mon Nov 27 14:37:41 2017 -0500 +++ b/libs/run_auto_All_for_assemblies.py Mon Nov 27 16:30:27 2017 -0500 @@ -51,7 +51,7 @@ real_file=file2 except: real_file=file1 - #print "###The genome name:",file1 + #print("###The genome name:",file1 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015 os.system('touch result.txt') database_path="database"###01/27/2015 @@ -81,7 +81,7 @@ elif O1_3_19=="+": Otype="1,3,19" else: - print "No_O3,10_O1,3,19_spe_sequences" + print("No_O3,10_O1,3,19_spe_sequences") os.system('python '+dirpath+'/H_combination_output_analysis.py '+real_file+' '+database_path+'/H_new_fliC_protein_database.fasta '+database_path+'/H_new_fljB_protein_database.fasta >temp_result_'+str(q)+'H.txt') os.system('cat temp_result_'+str(q)+'H.txt>>data_log.txt') handle2=open('temp_result_'+str(q)+'H.txt',"r") @@ -166,18 +166,18 @@ if len(seronames)>1: star="*" star_line="The predicted serotypes share the same general formula:\t"+Otype+":"+fliC+":"+fljB+"\n"## - #print "$$$The most possible formula is: (by the order O:H1:H2) ",Otype,":",fliC,":",fljB - #print "$$$The possible serotyes are:",seronames + #print("$$$The most possible formula is: (by the order O:H1:H2) ",Otype,":",fliC,":",fljB + #print("$$$The possible serotyes are:",seronames m=0 for y in seronames: if y in file1: - #print "$$$ Is the judgement true? Answer:Yes!" #here we use file1, because we want ":", while file2 turned it to "__" + #print("$$$ Is the judgement true? Answer:Yes!" #here we use file1, because we want ":", while file2 turned it to "__" answer="Yes" m=1 if m==0: - #print "$$$ Is the judgement true? Answer: Need to check the records and file names" + #print("$$$ Is the judgement true? Answer: Need to check the records and file names" answer="Not sure" - print "\n","\n" + print("\n","\n") predict_form=Otype+":"+fliC+":"+fljB predict_sero=(" or ").join(seronames) if predict_form=="9:g,m:-":# @@ -250,12 +250,12 @@ txt_names.append(file_names[j+i].replace(' ','_').replace(":","__").replace("[","").replace("]","")+".txt") print txt_names for j in xrange(len(txt_names)): - print i,"and",j - print i+j+1 + print(i,"and",j) + print(i+j+1) file=open(txt_names[j],"r") handle=list(file) b=handle[0].split("\t") - print b + print(b) sheet.write(i+j+1,0,b[0]) sheet.write(i+j+1,1,b[1]) sheet.write(i+j+1,2,b[2]) @@ -265,7 +265,7 @@ sheet.write(i+j+1,6,b[6]) sheet.write(i+j+1,7,b[7]) - print "End time,",time.time() + print("End time,",time.time()) file3.save("Seqsero_result2.xls") '''
--- a/libs/run_auto_All_for_web_multi_revise.py Mon Nov 27 14:37:41 2017 -0500 +++ b/libs/run_auto_All_for_web_multi_revise.py Mon Nov 27 16:30:27 2017 -0500 @@ -63,7 +63,7 @@ additional_file=file2 except: pass - #print "###The genome name:",file1 + #print("###The genome name:",file1 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015 os.system('touch result.txt') database_path="database"###01/27/2015, because we add one directory to the result directory @@ -176,18 +176,18 @@ if len(seronames)>1: star="*" star_line="The predicted serotypes share the same general formula:\t"+Otype+":"+fliC+":"+fljB+"\n"## - #print "$$$The most possible formula is: (by the order O:H1:H2) ",Otype,":",fliC,":",fljB - #print "$$$The possible serotyes are:",seronames + #print("$$$The most possible formula is: (by the order O:H1:H2) ",Otype,":",fliC,":",fljB + #print("$$$The possible serotyes are:",seronames m=0 for y in seronames: if y in file1: - #print "$$$ Is the judgement true? Answer:Yes!" #here we use file1, because we want ":", while file2 turned it to "__" + #print("$$$ Is the judgement true? Answer:Yes!" #here we use file1, because we want ":", while file2 turned it to "__" answer="Yes" m=1 if m==0: - #print "$$$ Is the judgement true? Answer: Need to check the records and file names" + #print("$$$ Is the judgement true? Answer: Need to check the records and file names" answer="Not sure" - print "\n","\n" + print("\n","\n") predict_form=Otype+":"+fliC+":"+fljB# predict_sero=(" or ").join(seronames)# if predict_form=="9:g,m:-":# @@ -299,16 +299,16 @@ for j in xrange(len(jobs)): jobs[j].join() txt_names.append(file_names[j+i].replace(' ','_').replace(":","__").replace("[","").replace("]","")+".txt") - print txt_names + print(txt_names) for j in xrange(len(txt_names)): - print i,"and",j - print i+j+1 + print(i,"and",j) + print(i+j+1) file=open(txt_names[j],"r") handle=list(file) b=handle[0].split("\t") - print b + print(b) - print "End time,",time.time() + print("End time,",time.time()) ''' if __name__ == '__main__':
--- a/libs/special_gene_test_assemblies.py Mon Nov 27 14:37:41 2017 -0500 +++ b/libs/special_gene_test_assemblies.py Mon Nov 27 16:30:27 2017 -0500 @@ -31,7 +31,7 @@ for record in records: for alignment in record.alignments: if x in alignment.hit_def: #multi gene database, so... - print x,"got a hit, evaluating the hit quality..." + print(x,"got a hit, evaluating the hit quality...") score=0 for hsp in alignment.hsps: if hsp.expect<E_thresh: @@ -46,9 +46,9 @@ First_Choice=Htype score=scorelist[Htype] if float(score)>=0.1*aver_len: - print "$$$",First_Choice,"got a hit, score:",score + print("$$$",First_Choice,"got a hit, score:",score) else: - print "$$$No ",x,"exists" + print("$$$No ",x,"exists") os.system("rm "+database+"_db.*")##########1/28/2015 os.system("rm "+xml_file)##########1/28/2015
--- a/run_seqsero.py Mon Nov 27 14:37:41 2017 -0500 +++ b/run_seqsero.py Mon Nov 27 16:30:27 2017 -0500 @@ -9,7 +9,7 @@ # paths to files and scripts fileDir = os.path.dirname(os.path.realpath('__file__')) -print fileDir +print(fileDir) fq_list1 = [] Tool_directory = sys.argv[2] @@ -31,7 +31,7 @@ -print Tool_directory + ' path printed ' +print(Tool_directory + ' path printed ') test_out.write(str(len(sys.argv))+"\n") if len(sys.argv) >= 2: @@ -55,7 +55,7 @@ ''' Creates dict with runs as keys and list with filenames as values. ''' -# print "monkey" +# print("monkey") run2fastqs = {} for file in fastq_files: run = '' @@ -83,9 +83,9 @@ else: run2fastqs[run] = [new_path_file] except IOError: - print "Data not found. It is possible for a deleted file to still be listed "\ + print("Data not found. It is possible for a deleted file to still be listed "\ "in a Galaxy library. Please confirm that the data still exists on this "\ - "server. You may need to upload it again." + "server. You may need to upload it again.") return run2fastqs def run_seqsero(run2fastqs): @@ -115,7 +115,7 @@ for line in sample: # line is actually the entire seqsero output. line = str(line) linel = re.split("\n", line) -# print linel +# print(linel) #lines_used = [] for element in linel: # element is a line of seqsero output. element = element.rstrip("\n") @@ -144,7 +144,7 @@ fastq2serotype[fastqs][4] = elementl[1] # add predicted serotype if element not in lines_used and re.search("\w", fastqs) and len(element) > 7: fastq2comment[fastqs].append(element) - #print "\n" + #print("\n" return fastq2serotype, fastq2comment def print_html(fastq2serotype, fastq2comment): @@ -170,9 +170,9 @@ html_out.write('<td>'+element+'</td>\n') html_out.write('</tr>\n') tab_out.write(header+"\n") - print "\n\n", header + print("\n\n", header) for fastq in fastq2serotype: -# print fastq, fastq2serotype[fastq] +# print(fastq, fastq2serotype[fastq]) line_to_print = fastq+'\t'+"\t".join(fastq2serotype[fastq]) tab_out.write(line_to_print+"\n") html_out.write('<tr>\n') @@ -180,9 +180,9 @@ for antigen in fastq2serotype[fastq]: html_out.write('<td>'+antigen+'</td>\n') html_out.write('</tr>\n') - print line_to_print + print(line_to_print) html_out.write('</table>\n') - print "\n" + print("\n") for fastq in fastq2comment: tab_out.write("\n"+fastq+"\n") html_out.write('<tr>\n') @@ -192,8 +192,8 @@ #if len(line) > 7: html_out.write('<td>'+line+'</td>\n') tab_out.write(line+"\n") - print line - print "\n" + print(line) + print("\n") html_out.write('</p>\n') html_out.write('</tr>\n') html_out.write('</body>\n') @@ -203,7 +203,7 @@ def readFile(filename): filehandle = open(filename) - print filehandle.read() + print(filehandle.read()) filehandle.close()
--- a/test.txt Mon Nov 27 14:37:41 2017 -0500 +++ b/test.txt Mon Nov 27 16:30:27 2017 -0500 @@ -3,14 +3,12 @@ /galaxydir/galaxy/tools/GalaxySeqsero/run_seqsero.py /galaxydir/galaxy/database/files/000/dataset_1.dat,/galaxydir/galaxy/database/files/000/dataset_2.dat /galaxydir/galaxy/tools/GalaxySeqsero/ 3 2 - - - +('\n', '\n') Input files: dataset_1_SRR1198853.fastq dataset_2_SRR1198853.fastq -O antigen prediction: O-- -H1 antigen prediction(fliC): - -H2 antigen prediction(fljB): - -Predicted antigenic profile: -:-:- +O antigen prediction: O-? +H1 antigen prediction(fliC): k +H2 antigen prediction(fljB): 1,5 +Predicted antigenic profile: ?:k:1,5 Predicted serotype(s): N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme) check fastq id and make them in accordance with each other...please wait... @@ -21,4 +19,4 @@ None -Mon Nov 27 09:01:14 2017 \ No newline at end of file +Mon Nov 27 16:25:00 2017 \ No newline at end of file