Mercurial > repos > p.lucas > taxid_genusexpand_taxid2acc
changeset 6:81acd8138218 draft
Uploaded 13 03 23 Update to the last version of the script
author | p.lucas |
---|---|
date | Mon, 13 Mar 2023 13:57:09 +0000 |
parents | b53e594a8042 |
children | 9238e54d6532 |
files | TAXID_genusexpand_taxid2acc.py |
diffstat | 1 files changed, 19 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/TAXID_genusexpand_taxid2acc.py Mon Mar 13 13:23:31 2023 +0000 +++ b/TAXID_genusexpand_taxid2acc.py Mon Mar 13 13:57:09 2023 +0000 @@ -78,7 +78,7 @@ help="taxid acc_number list in tsv (tabular separated at each line)", metavar="FILE") parser.add_argument("-o", "--acc_out_f", dest='acc_out_f', - help="[optional if --taxid_acc_in_f provided] Output text file with accession numbers from krona_taxid_acc_f NOT found under taxid in ncbi taxonomy tree", + help="[optional if --taxid_acc_in_f provided] Output text file with accession numbers of COMPLETE GENOMES under taxid in ncbi taxonomy tree", metavar="FILE") # parser.add_argument("-r", "--rank", dest='rank', # help="[Optional] default: genus, rank to retain for each acc number provided. We will retain all the acc number descendant from this 'rank' (genus) taxid list", @@ -117,16 +117,22 @@ b_test_add_host_chr_taxids_accnr_from_ori_list) if ((not b_test)and - ((len(sys.argv) < 1) or (len(sys.argv) > 5))): - print("\n".join(["To use this scripts, run:", - "conda activate TAXID_genusexpand_taxid2acc", - "./TAXID_genusexpand_taxid2acc.py --test_all --load_ncbi_tax_f", - " ", - "Then you won't need --test_all --load_ncbi_tax_f options\n\n", - "Then, as an example:\n\n", - ' '.join(['./TAXID_genusexpand_taxid2acc.py', - '-i taxid_accnr_list.tsv', - '-o accnr_out_list.txt']),"\n\n" ])) + ((len(sys.argv) < 2) or (len(sys.argv) > 5))): + print("\n".join([ + "Aim: find accession numbers of complete genomes related to provided taxids.", + "If not found at species level, try at upper taxonomic level until order.", + "Retains only 1 complete genome is several available:", + " - the one with the highest version number, if not sufficient", + " - the one with the highest accession number", + "To use this scripts, run:", + "conda activate TAXID_genusexpand_taxid2acc", + "./TAXID_genusexpand_taxid2acc.py --test_all --load_ncbi_tax_f", + " ", + "Then you won't need --test_all --load_ncbi_tax_f options\n\n", + "Then, as an example:\n\n", + ' '.join(['./TAXID_genusexpand_taxid2acc.py', + '-i taxid_accnr_list.tsv', + '-o accnr_out_list.txt']),"\n\n" ])) parser.print_help() print(prog_tag + "[Error] we found "+str(len(sys.argv)) + " arguments, exit line "+str(frame.f_lineno)) @@ -184,7 +190,6 @@ " file does not exist, line "+ str(sys._getframe().f_lineno) ) cmd = "cut -f 1,2 "+taxid_acc_tabular_f+" | sort | uniq " - # cmd = "cut -f 1 "+taxid_acc_tabular_f+" | sort | uniq " for line in os.popen(cmd).readlines(): k, v = line.rstrip().split() @@ -248,7 +253,8 @@ max_accnr_version = curr_accnr_version kept_accnr_i = i # print(f"record kept_accnr_i:{kept_accnr_i}") - elif curr_accnr_nr > max_accnr_nr: + elif(( curr_accnr_version == max_accnr_version)and + (curr_accnr_nr > max_accnr_nr)): max_accnr_nr = curr_accnr_nr kept_accnr_i = i # print(f"record kept_accnr_i:{kept_accnr_i}")