Mercurial > repos > abims-sbr > cds_search
comparison scripts/S01_find_orf_on_multiple_alignment.py @ 3:ff98ed7849fa draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
| author | abims-sbr |
|---|---|
| date | Wed, 17 Jan 2018 08:55:29 -0500 |
| parents | 0d2f72caea10 |
| children | 35e39b4128ba |
comparison
equal
deleted
inserted
replaced
| 2:0d2f72caea10 | 3:ff98ed7849fa |
|---|---|
| 6 ## CRITERIA 1 ## Longest part of the alignment of sequence without codon stop "*", tested in the 3 potential ORF | 6 ## CRITERIA 1 ## Longest part of the alignment of sequence without codon stop "*", tested in the 3 potential ORF |
| 7 ## CRITERIA 2 ## This longest part should be > 150nc or 50aa | 7 ## CRITERIA 2 ## This longest part should be > 150nc or 50aa |
| 8 ## CRITERIA 3 ## [OPTIONNAL] A codon start "M" should be present in this longuest part, before the last 50 aa | 8 ## CRITERIA 3 ## [OPTIONNAL] A codon start "M" should be present in this longuest part, before the last 50 aa |
| 9 ## OUTPUTs "05_CDS_aa" & "05_CDS_nuc" => NOT INCLUDE THIS CRITERIA | 9 ## OUTPUTs "05_CDS_aa" & "05_CDS_nuc" => NOT INCLUDE THIS CRITERIA |
| 10 ## OUTPUTs "06_CDS_with_M_aa" & "06_CDS_with_M_nuc" => INCLUDE THIS CRITERIA | 10 ## OUTPUTs "06_CDS_with_M_aa" & "06_CDS_with_M_nuc" => INCLUDE THIS CRITERIA |
| 11 | |
| 12 | |
| 13 ############################### | |
| 14 ##### DEF 1 : Dico fasta ##### | |
| 15 ############################### | |
| 16 def dico(fasta_file_path): | |
| 17 F2 = open(fasta_file_path, "r") | |
| 18 dicoco = {} | |
| 19 while 1: | |
| 20 next2 = F2.readline() | |
| 21 if not next2: | |
| 22 break | |
| 23 if next2[0] == ">": | |
| 24 fasta_name_query = next2[:-1] | |
| 25 Sn = string.split(fasta_name_query, "||") | |
| 26 fasta_name_query = Sn[0] | |
| 27 next3 = F2.readline() | |
| 28 fasta_seq_query = next3[:-1] | |
| 29 dicoco[fasta_name_query]=fasta_seq_query | |
| 30 F2.close() | |
| 31 return(dicoco) | |
| 32 ############################################################ | |
| 33 | |
| 34 | 11 |
| 35 #################################################### | 12 #################################################### |
| 36 ###### DEF 2 : Create bash for genetic code ######## | 13 ###### DEF 2 : Create bash for genetic code ######## |
| 37 #################################################### | 14 #################################################### |
| 38 ### KEY = codon | 15 ### KEY = codon |
| 341 | 318 |
| 342 ####################### | 319 ####################### |
| 343 ##### RUN RUN RUN ##### | 320 ##### RUN RUN RUN ##### |
| 344 ####################### | 321 ####################### |
| 345 import string, os, time, re, zipfile, sys | 322 import string, os, time, re, zipfile, sys |
| 323 from dico import dico | |
| 346 | 324 |
| 347 infiles = sys.argv[1] | 325 infiles = sys.argv[1] |
| 348 MINIMAL_CDS_LENGTH = int(sys.argv[3]) ## in aa number | 326 MINIMAL_CDS_LENGTH = int(sys.argv[3]) ## in aa number |
| 349 | 327 |
| 350 ## INPUT / OUTPUT | 328 ## INPUT / OUTPUT |
| 380 count_file_with_CDS_plus_M = 0 | 358 count_file_with_CDS_plus_M = 0 |
| 381 | 359 |
| 382 for file in list_file: | 360 for file in list_file: |
| 383 count_file_processed = count_file_processed + 1 | 361 count_file_processed = count_file_processed + 1 |
| 384 fasta_file_path = "./%s" %file | 362 fasta_file_path = "./%s" %file |
| 385 bash_fasta = dico(fasta_file_path) ### DEF 1 ### | 363 fasta_file = open(fasta_file_path, "r") |
| 364 bash_fasta = dico(fasta_file) ### DEF 1 ### | |
| 365 fasta_file.close() | |
| 386 BESTORF_nuc, BESTORF_nuc_CODING, BESTORF_nuc_CDS_with_M, BESTORF_aa, BESTORF_aa_CODING, BESTORF_aa_CDS_with_M = find_good_ORF_criteria_3(bash_fasta, bash_codeUniversel) ### DEF 4 - PART 2 - ### | 366 BESTORF_nuc, BESTORF_nuc_CODING, BESTORF_nuc_CDS_with_M, BESTORF_aa, BESTORF_aa_CODING, BESTORF_aa_CDS_with_M = find_good_ORF_criteria_3(bash_fasta, bash_codeUniversel) ### DEF 4 - PART 2 - ### |
| 387 | 367 |
| 388 ## a ## OUTPUT BESTORF_nuc | 368 ## a ## OUTPUT BESTORF_nuc |
| 389 if BESTORF_nuc != {}: | 369 if BESTORF_nuc != {}: |
| 390 count_file_with_CDS = count_file_with_CDS +1 | 370 count_file_with_CDS = count_file_with_CDS +1 |
