| 0 | 1 from itertools import groupby | 
|  | 2 import sys | 
|  | 3 import subprocess | 
|  | 4 import argparse | 
|  | 5 import time | 
|  | 6 import urllib | 
|  | 7 from multiprocessing import Process, Queue | 
|  | 8 import itertools | 
|  | 9 | 
|  | 10 subprocess.call(['mkdir', 'out']) | 
|  | 11 parser = argparse.ArgumentParser() | 
|  | 12 | 
|  | 13 parser.add_argument("-pos", "--positions", help="", action="store") | 
|  | 14 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") | 
|  | 15 parser.add_argument("-gen", "--genome", help="tool directory path", action="store") | 
|  | 16 parser.add_argument("-gff3", "--gff", help="",action="store") | 
|  | 17 | 
|  | 18 args = parser.parse_args() | 
|  | 19 | 
|  | 20 #======================================================================================================================================= | 
|  | 21 | 
|  | 22 | 
|  | 23 #-----------------------Download and read the file hsa.gff3--------------------------------- | 
|  | 24 | 
|  | 25 def read_url(q): | 
|  | 26 | 
|  | 27 | 
|  | 28     url = 'ftp://mirbase.org/pub/mirbase/CURRENT/genomes/'+args.gff | 
|  | 29     #url = 'ftp://mirbase.org/pub/mirbase/21/genomes/hsa.gff3' | 
|  | 30     response = urllib.urlopen(url) | 
|  | 31     data = response.read() | 
|  | 32     file_mirna = data.decode('utf-8') | 
|  | 33     file_mirna = file_mirna.split("\n") | 
|  | 34     q.put(file_mirna) | 
|  | 35 | 
|  | 36 | 
|  | 37 def write_gff(file_mirna): | 
|  | 38     f = open('original_mirnas.bed', "w") | 
|  | 39 | 
|  | 40     for i in range(len(file_mirna)): | 
|  | 41         f.write(file_mirna[i] + "\n") | 
|  | 42 | 
|  | 43 | 
|  | 44 #------------------------Processed the file with mature mirnas------------------------------- | 
|  | 45 | 
|  | 46 | 
|  | 47 def new_gff(file_mirna): | 
|  | 48 | 
|  | 49     mirna = []   # new list with shifted mirnas | 
|  | 50     positions =int(args.positions)   # positions shifted | 
|  | 51     print(str(positions)+" positions shifted") | 
|  | 52     names=[] | 
|  | 53     # Remove lines which conatain the word "primary" | 
|  | 54     for i in range(len(file_mirna)): | 
|  | 55 | 
|  | 56         if "primary" not in file_mirna[i]: | 
|  | 57             mirna.append(file_mirna[i]) | 
|  | 58 | 
|  | 59             if "chr" in file_mirna[i]: | 
|  | 60                 a=file_mirna[i].split("\t")[0] | 
|  | 61                 b=file_mirna[i].split("\t")[6] | 
|  | 62                 c=file_mirna[i].split("=")[3].split(";")[0] | 
|  | 63                 names.append([a,b,c]) | 
|  | 64 | 
|  | 65     names.sort() | 
|  | 66     sublists=[] | 
|  | 67 | 
|  | 68     [sublists.append([item] * names.count(item)) for item in names if names.count(item)>=2] | 
|  | 69     sublists.sort() | 
|  | 70     sublists=list(sublists for sublists, _ in itertools.groupby(sublists)) | 
|  | 71     unique_names=[[x[0][0],x[0][2]] for x in sublists] | 
|  | 72 | 
|  | 73     for x in unique_names: | 
|  | 74         flag = 0 | 
|  | 75         for i in range(len(mirna)): | 
|  | 76 | 
|  | 77               if "chr" in mirna[i] and mirna[i].split("=")[3].split(";")[0]==x[1] and x[0]==mirna[i].split("\t")[0]: | 
|  | 78                  flag+=1 | 
|  | 79                  ktr=mirna[i].split(";")[0]+";"+mirna[i].split(";")[1]+";"+mirna[i].split(";")[2]+"-{"+str(flag)+"}"+";"+mirna[i].split(";")[3] | 
|  | 80                  mirna[i]=ktr | 
|  | 81 | 
|  | 82 | 
|  | 83     f = open('shifted_mirnas.bed', "w") | 
|  | 84 | 
|  | 85     for i in range(len(mirna)): | 
|  | 86 | 
|  | 87         if "chr" in mirna[i]: | 
|  | 88 | 
|  | 89             # change the name of current mirna | 
|  | 90             mirna_name_1 = mirna[i].split("=")[3] | 
|  | 91             mirna_name_2 = mirna[i].split("=")[4] | 
|  | 92             # mirna_name_2 = mirna_name_2.split(";")[0] | 
|  | 93             mirna_name_1 = mirna_name_1.split(";")[0]+"_"+mirna_name_2+"_"+mirna[i].split("\t")[0] | 
|  | 94             mirna[i] = mirna[i].replace("miRNA", mirna_name_1) | 
|  | 95 | 
|  | 96             # Shift the position of mirna | 
|  | 97             start = mirna[i].split("\t")[3] | 
|  | 98             end = mirna[i].split("\t")[4] | 
|  | 99             shift_start = int(start)-positions          # shift the interval to the left | 
|  | 100             shift_end = int(end)+positions              # shift the interval to the right | 
|  | 101 | 
|  | 102             # Replace the previous intervals with the new | 
|  | 103             mirna[i] = mirna[i].replace(start, str(shift_start)) | 
|  | 104             mirna[i] = mirna[i].replace(end, str(shift_end)) | 
|  | 105 | 
|  | 106             f.write(mirna[i] + "\n") | 
|  | 107 | 
|  | 108     f.close() | 
|  | 109 | 
|  | 110 #=================================================================================================================================== | 
|  | 111 | 
|  | 112 def bedtool(genome): | 
|  | 113     subprocess.call(["bedtools", "getfasta", "-fi", "/cvmfs/data.galaxyproject.org/byhand/"+genome+"/sam_index/"+genome+".fa", "-bed", "shifted_mirnas.bed", "-fo", "new_ref.fa", "-name", "-s"]) | 
|  | 114 | 
|  | 115 #=================================================================================================================================== | 
|  | 116 | 
|  | 117 | 
|  | 118 if __name__=='__main__': | 
|  | 119 | 
|  | 120     starttime = time.time() | 
|  | 121     q = Queue() | 
|  | 122     p1 = Process(target=read_url(q)) | 
|  | 123     p1.start() | 
|  | 124     p1.join() | 
|  | 125 | 
|  | 126     file_mirna=q.get() | 
|  | 127 | 
|  | 128     p2 = [Process(target=write_gff(file_mirna))] | 
|  | 129     p2.extend([Process(target=new_gff(file_mirna))]) | 
|  | 130     [x.start() for x in p2] | 
|  | 131     [x.join() for x in p2] | 
|  | 132 | 
|  | 133     p3 = Process(target=bedtool(args.genome)) | 
|  | 134     p3.start() | 
|  | 135     p3.join() | 
|  | 136 | 
|  | 137     print('That took {} seconds'.format(time.time() - starttime)) | 
|  | 138 |