comparison format_fasta_hairpins.py @ 3:ee99c6374a3b draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
author artbio
date Fri, 18 Oct 2019 19:18:17 -0400
parents
children
comparison
equal deleted inserted replaced
2:fe1ed513da99 3:ee99c6374a3b
1 import argparse
2 import gzip
3
4
5 def Parser():
6 the_parser = argparse.ArgumentParser()
7 the_parser.add_argument(
8 '--hairpins_path', action="store", type=str,
9 help="BASE url. ex: /pub/mirbase/22/")
10 the_parser.add_argument(
11 '--output', action="store", type=str,
12 help="parsed hairpin output in fasta format")
13 the_parser.add_argument(
14 '--basename', action="store", type=str,
15 help="genome basename of the parsed fasta")
16 args = the_parser.parse_args()
17 return args
18
19
20 def get_fasta_dic(gzipfile):
21 '''
22 gzipfile value example : 'mirbase/22/hairpin.fa.gz'
23 '''
24 item_dic = {}
25 with gzip.open(gzipfile, 'rb') as f:
26 current_item = ''
27 stringlist = []
28 for line in f:
29 line = line.decode('utf-8').strip('\n')
30 if (line[0] == ">"):
31 # dump the sequence of the previous item
32 if current_item and stringlist:
33 item_dic[current_item] = "".join(stringlist)
34 # take first word of item '''
35 current_item = line[1:].split()[0]
36 stringlist = []
37 else:
38 stringlist.append(line)
39 item_dic[current_item] = "".join(stringlist) # for the last item
40 return item_dic
41
42
43 def convert_and_print_hairpins(gzipfile, basename, fasta_output):
44 raw_fasta_dict = get_fasta_dic(gzipfile)
45 parsed_fasta_dict = {}
46 for head in raw_fasta_dict:
47 if basename in head:
48 parsed_fasta_dict[head] = raw_fasta_dict[head]
49 parsed_fasta_dict[head] = ''.join(
50 [i if i != 'u' else 't' for i in parsed_fasta_dict[head]])
51 parsed_fasta_dict[head] = ''.join(
52 [i if i != 'U' else 'T' for i in parsed_fasta_dict[head]])
53 with open(fasta_output, "w") as output:
54 for head in sorted(parsed_fasta_dict):
55 output.write('>%s\n%s\n' % (head, parsed_fasta_dict[head]))
56
57
58 def main(hairpins_path, basename, outfile):
59 convert_and_print_hairpins(hairpins_path, basename, outfile)
60
61
62 if __name__ == "__main__":
63 args = Parser()
64 main(args.hairpins_path, args.basename, args.output)