Mercurial > repos > artbio > mircounts
comparison format_fasta_hairpins.py @ 3:ee99c6374a3b draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
author | artbio |
---|---|
date | Fri, 18 Oct 2019 19:18:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:fe1ed513da99 | 3:ee99c6374a3b |
---|---|
1 import argparse | |
2 import gzip | |
3 | |
4 | |
5 def Parser(): | |
6 the_parser = argparse.ArgumentParser() | |
7 the_parser.add_argument( | |
8 '--hairpins_path', action="store", type=str, | |
9 help="BASE url. ex: /pub/mirbase/22/") | |
10 the_parser.add_argument( | |
11 '--output', action="store", type=str, | |
12 help="parsed hairpin output in fasta format") | |
13 the_parser.add_argument( | |
14 '--basename', action="store", type=str, | |
15 help="genome basename of the parsed fasta") | |
16 args = the_parser.parse_args() | |
17 return args | |
18 | |
19 | |
20 def get_fasta_dic(gzipfile): | |
21 ''' | |
22 gzipfile value example : 'mirbase/22/hairpin.fa.gz' | |
23 ''' | |
24 item_dic = {} | |
25 with gzip.open(gzipfile, 'rb') as f: | |
26 current_item = '' | |
27 stringlist = [] | |
28 for line in f: | |
29 line = line.decode('utf-8').strip('\n') | |
30 if (line[0] == ">"): | |
31 # dump the sequence of the previous item | |
32 if current_item and stringlist: | |
33 item_dic[current_item] = "".join(stringlist) | |
34 # take first word of item ''' | |
35 current_item = line[1:].split()[0] | |
36 stringlist = [] | |
37 else: | |
38 stringlist.append(line) | |
39 item_dic[current_item] = "".join(stringlist) # for the last item | |
40 return item_dic | |
41 | |
42 | |
43 def convert_and_print_hairpins(gzipfile, basename, fasta_output): | |
44 raw_fasta_dict = get_fasta_dic(gzipfile) | |
45 parsed_fasta_dict = {} | |
46 for head in raw_fasta_dict: | |
47 if basename in head: | |
48 parsed_fasta_dict[head] = raw_fasta_dict[head] | |
49 parsed_fasta_dict[head] = ''.join( | |
50 [i if i != 'u' else 't' for i in parsed_fasta_dict[head]]) | |
51 parsed_fasta_dict[head] = ''.join( | |
52 [i if i != 'U' else 'T' for i in parsed_fasta_dict[head]]) | |
53 with open(fasta_output, "w") as output: | |
54 for head in sorted(parsed_fasta_dict): | |
55 output.write('>%s\n%s\n' % (head, parsed_fasta_dict[head])) | |
56 | |
57 | |
58 def main(hairpins_path, basename, outfile): | |
59 convert_and_print_hairpins(hairpins_path, basename, outfile) | |
60 | |
61 | |
62 if __name__ == "__main__": | |
63 args = Parser() | |
64 main(args.hairpins_path, args.basename, args.output) |