annotate get_orfs_or_cdss.py @ 0:d5c3354c166d draft default tip

Uploaded
author cpt_testbed
date Fri, 29 Apr 2022 10:33:36 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
1 #!/usr/bin/env python
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
2 import argparse
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
3 import logging
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
4 from cpt import OrfFinder
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
5
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
6 logging.basicConfig()
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
7 log = logging.getLogger()
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
8
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
9
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
10 if __name__ == "__main__":
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
11 parser = argparse.ArgumentParser(description="Get open reading frames")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
12 parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Fasta file")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
13
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
14 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
15 "-f",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
16 "--format",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
17 dest="seq_format",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
18 default="fasta",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
19 help="Sequence format (e.g. fasta, fastq, sff)",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
20 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
21 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
22 "--table", dest="table", default=1, help="NCBI Translation table", type=int
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
23 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
24 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
25 "-t",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
26 "--ftype",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
27 dest="ftype",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
28 choices=("CDS", "ORF"),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
29 default="ORF",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
30 help="Find ORF or CDSs",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
31 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
32 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
33 "-e",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
34 "--ends",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
35 dest="ends",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
36 choices=("open", "closed"),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
37 default="closed",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
38 help="Open or closed. Closed ensures start/stop codons are present",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
39 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
40 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
41 "-m",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
42 "--mode",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
43 dest="mode",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
44 choices=("all", "top", "one"),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
45 default="all",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
46 help="Output all ORFs/CDSs from sequence, all ORFs/CDSs "
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
47 "with max length, or first with maximum length",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
48 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
49 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
50 "--min_len", dest="min_len", default=10, help="Minimum ORF/CDS length", type=int
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
51 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
52 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
53 "--strand",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
54 dest="strand",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
55 choices=("both", "forward", "reverse"),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
56 default="both",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
57 help="select strand",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
58 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
59
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
60 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
61 "--on",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
62 dest="out_nuc",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
63 type=argparse.FileType("w"),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
64 default="out.fna",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
65 help="Output nucleotide sequences",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
66 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
67 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
68 "--op",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
69 dest="out_prot",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
70 type=argparse.FileType("w"),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
71 default="out.fa",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
72 help="Output protein sequences",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
73 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
74 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
75 "--ob",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
76 dest="out_bed",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
77 type=argparse.FileType("w"),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
78 default="out.bed",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
79 help="Output BED file",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
80 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
81 parser.add_argument(
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
82 "--og",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
83 dest="out_gff3",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
84 type=argparse.FileType("w"),
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
85 default="out.gff3",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
86 help="Output GFF3 file",
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
87 )
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
88 parser.add_argument("-v", action="version", version="0.3.0")
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
89 args = parser.parse_args()
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
90
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
91 of = OrfFinder(args.table, args.ftype, args.ends, args.min_len, args.strand)
d5c3354c166d Uploaded
cpt_testbed
parents:
diff changeset
92 of.locate(args.fasta_file, args.out_nuc, args.out_prot, args.out_bed, args.out_gff3)