changeset 11:a1dc4c6a0c83 draft

Uploaded
author glogobyte
date Tue, 20 Oct 2020 09:41:27 +0000
parents 6731958cdc38
children 2b09cbf138c8
files pre_mirgene.py
diffstat 1 files changed, 89 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pre_mirgene.py	Tue Oct 20 09:41:27 2020 +0000
@@ -0,0 +1,89 @@
+import subprocess
+import argparse
+import time
+import urllib.request
+from multiprocessing import Process, Queue
+
+subprocess.call(['mkdir', 'out'])
+parser = argparse.ArgumentParser()
+parser.add_argument("-pos", "--positions", help="", action="store")
+parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
+parser.add_argument("-sym", "--symbol", help="",action="store")
+
+
+args = parser.parse_args()
+
+#=======================================================================================================================================
+
+def read_url1(path):
+
+    response = urllib.request.urlopen(path)
+    data = response.read()
+    data1 = data.decode('utf-8')
+    data1 = data1.split("\n")
+    del data1[-1]
+    q.put(data1)
+
+#===================================================================================================================================
+
+def custom_arms_mirgene(mat_mirna,pri_mirna):
+   mat_ext=[]
+   for i in range(0,len(mat_mirna),2):
+       if args.symbol in mat_mirna[i]:
+          mat_seq=mat_mirna[i+1]
+
+          for j in range(0,len(pri_mirna),2):
+              if mat_mirna[i].split("_")[0] == pri_mirna[j].split("_")[0]:
+                 temp_ext = pri_mirna[j+1].split(mat_seq)
+                 mat_ext.append([mat_mirna[i],temp_ext[0][-int(args.positions):]+mat_seq+temp_ext[1][0:int(args.positions)]])
+
+   print(str(args.positions)+" positions shifted")
+   return(mat_ext)
+
+#===================================================================================================================================
+
+def write_custom_arms(list,name,c):
+
+    f = open(name, "w")
+    for x in list:
+       if c==1:
+          f.write(x[0]+'\n')
+          f.write(x[1]+'\n')
+       else:
+          f.write(x+'\n')
+
+#==================================================================================================================================
+
+if __name__=='__main__':
+
+    starttime = time.time()
+    print(args.symbol)
+    q = Queue()
+
+    mat_url = 'https://mirgenedb.org/fasta/ALL?mat=1'
+    p = [Process(target=read_url1(mat_url))]
+    star_url = 'https://mirgenedb.org/fasta/ALL?star=1'
+    p.extend([Process(target=read_url1(star_url))])
+    pri_url = 'https://mirgenedb.org/static/data/ALL/ALL--pri-30-30.fas'
+    p.extend([Process(target=read_url1(pri_url))])
+    [x.start() for x in p]
+    [x.join() for x in p]
+
+    mat_mirna=q.get()
+    star_mirna=q.get()
+
+    mat_mirna.extend(star_mirna)
+    for i in range(1,len(mat_mirna),2):
+        mat_mirna[i]=mat_mirna[i].replace("U","T")
+    pri_mirna=q.get()
+
+    mat_ext=custom_arms_mirgene(mat_mirna,pri_mirna)
+    p1 = [Process(target=write_custom_arms(mat_mirna,"shifted_mirnas.bed",0))]
+    p1.extend([Process(target=write_custom_arms(pri_mirna,"original_mirnas.bed",0))])
+    p1.extend([Process(target=write_custom_arms(mat_ext,"new_ref.fa",1))])
+    [x.start() for x in p1]
+    [x.join() for x in p1]
+
+    print('That took {} seconds'.format(time.time() - starttime))
+
+