annotate pre_mirgene.py @ 11:a1dc4c6a0c83 draft

Uploaded
author glogobyte
date Tue, 20 Oct 2020 09:41:27 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
1 import subprocess
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
2 import argparse
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
3 import time
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
4 import urllib.request
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
5 from multiprocessing import Process, Queue
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
6
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
7 subprocess.call(['mkdir', 'out'])
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
8 parser = argparse.ArgumentParser()
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
9 parser.add_argument("-pos", "--positions", help="", action="store")
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
10 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
11 parser.add_argument("-sym", "--symbol", help="",action="store")
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
12
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
13
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
14 args = parser.parse_args()
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
15
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
16 #=======================================================================================================================================
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
17
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
18 def read_url1(path):
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
19
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
20 response = urllib.request.urlopen(path)
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
21 data = response.read()
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
22 data1 = data.decode('utf-8')
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
23 data1 = data1.split("\n")
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
24 del data1[-1]
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
25 q.put(data1)
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
26
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
27 #===================================================================================================================================
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
28
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
29 def custom_arms_mirgene(mat_mirna,pri_mirna):
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
30 mat_ext=[]
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
31 for i in range(0,len(mat_mirna),2):
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
32 if args.symbol in mat_mirna[i]:
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
33 mat_seq=mat_mirna[i+1]
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
34
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
35 for j in range(0,len(pri_mirna),2):
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
36 if mat_mirna[i].split("_")[0] == pri_mirna[j].split("_")[0]:
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
37 temp_ext = pri_mirna[j+1].split(mat_seq)
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
38 mat_ext.append([mat_mirna[i],temp_ext[0][-int(args.positions):]+mat_seq+temp_ext[1][0:int(args.positions)]])
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
39
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
40 print(str(args.positions)+" positions shifted")
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
41 return(mat_ext)
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
42
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
43 #===================================================================================================================================
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
44
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
45 def write_custom_arms(list,name,c):
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
46
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
47 f = open(name, "w")
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
48 for x in list:
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
49 if c==1:
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
50 f.write(x[0]+'\n')
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
51 f.write(x[1]+'\n')
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
52 else:
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
53 f.write(x+'\n')
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
54
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
55 #==================================================================================================================================
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
56
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
57 if __name__=='__main__':
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
58
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
59 starttime = time.time()
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
60 print(args.symbol)
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
61 q = Queue()
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
62
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
63 mat_url = 'https://mirgenedb.org/fasta/ALL?mat=1'
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
64 p = [Process(target=read_url1(mat_url))]
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
65 star_url = 'https://mirgenedb.org/fasta/ALL?star=1'
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
66 p.extend([Process(target=read_url1(star_url))])
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
67 pri_url = 'https://mirgenedb.org/static/data/ALL/ALL--pri-30-30.fas'
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
68 p.extend([Process(target=read_url1(pri_url))])
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
69 [x.start() for x in p]
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
70 [x.join() for x in p]
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
71
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
72 mat_mirna=q.get()
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
73 star_mirna=q.get()
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
74
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
75 mat_mirna.extend(star_mirna)
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
76 for i in range(1,len(mat_mirna),2):
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
77 mat_mirna[i]=mat_mirna[i].replace("U","T")
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
78 pri_mirna=q.get()
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
79
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
80 mat_ext=custom_arms_mirgene(mat_mirna,pri_mirna)
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
81 p1 = [Process(target=write_custom_arms(mat_mirna,"shifted_mirnas.bed",0))]
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
82 p1.extend([Process(target=write_custom_arms(pri_mirna,"original_mirnas.bed",0))])
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
83 p1.extend([Process(target=write_custom_arms(mat_ext,"new_ref.fa",1))])
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
84 [x.start() for x in p1]
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
85 [x.join() for x in p1]
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
86
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
87 print('That took {} seconds'.format(time.time() - starttime))
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
88
a1dc4c6a0c83 Uploaded
glogobyte
parents:
diff changeset
89