# HG changeset patch # User glogobyte # Date 1603186866 0 # Node ID 6731958cdc386a0793e79badbb17e063cdcd8e06 # Parent 0aefa515b43691ee90a8acb8f2e73bb6ae258c2f Deleted selected files diff -r 0aefa515b436 -r 6731958cdc38 pre_mirbase.py --- a/pre_mirbase.py Fri Oct 16 18:50:14 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,138 +0,0 @@ -from itertools import groupby -import sys -import subprocess -import argparse -import time -import urllib -from multiprocessing import Process, Queue -import itertools - -subprocess.call(['mkdir', 'out']) -parser = argparse.ArgumentParser() - -parser.add_argument("-pos", "--positions", help="", action="store") -parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") -parser.add_argument("-gen", "--genome", help="tool directory path", action="store") -parser.add_argument("-gff3", "--gff", help="",action="store") - -args = parser.parse_args() - -#======================================================================================================================================= - - -#-----------------------Download and read the file hsa.gff3--------------------------------- - -def read_url(q): - - - url = 'ftp://mirbase.org/pub/mirbase/CURRENT/genomes/'+args.gff - #url = 'ftp://mirbase.org/pub/mirbase/21/genomes/hsa.gff3' - response = urllib.urlopen(url) - data = response.read() - file_mirna = data.decode('utf-8') - file_mirna = file_mirna.split("\n") - q.put(file_mirna) - - -def write_gff(file_mirna): - f = open('original_mirnas.bed', "w") - - for i in range(len(file_mirna)): - f.write(file_mirna[i] + "\n") - - -#------------------------Processed the file with mature mirnas------------------------------- - - -def new_gff(file_mirna): - - mirna = [] # new list with shifted mirnas - positions =int(args.positions) # positions shifted - print(str(positions)+" positions shifted") - names=[] - # Remove lines which conatain the word "primary" - for i in range(len(file_mirna)): - - if "primary" not in file_mirna[i]: - mirna.append(file_mirna[i]) - - if "chr" in file_mirna[i]: - a=file_mirna[i].split("\t")[0] - b=file_mirna[i].split("\t")[6] - c=file_mirna[i].split("=")[3].split(";")[0] - names.append([a,b,c]) - - names.sort() - sublists=[] - - [sublists.append([item] * names.count(item)) for item in names if names.count(item)>=2] - sublists.sort() - sublists=list(sublists for sublists, _ in itertools.groupby(sublists)) - unique_names=[[x[0][0],x[0][2]] for x in sublists] - - for x in unique_names: - flag = 0 - for i in range(len(mirna)): - - if "chr" in mirna[i] and mirna[i].split("=")[3].split(";")[0]==x[1] and x[0]==mirna[i].split("\t")[0]: - flag+=1 - ktr=mirna[i].split(";")[0]+";"+mirna[i].split(";")[1]+";"+mirna[i].split(";")[2]+"-{"+str(flag)+"}"+";"+mirna[i].split(";")[3] - mirna[i]=ktr - - - f = open('shifted_mirnas.bed', "w") - - for i in range(len(mirna)): - - if "chr" in mirna[i]: - - # change the name of current mirna - mirna_name_1 = mirna[i].split("=")[3] - mirna_name_2 = mirna[i].split("=")[4] - # mirna_name_2 = mirna_name_2.split(";")[0] - mirna_name_1 = mirna_name_1.split(";")[0]+"_"+mirna_name_2+"_"+mirna[i].split("\t")[0] - mirna[i] = mirna[i].replace("miRNA", mirna_name_1) - - # Shift the position of mirna - start = mirna[i].split("\t")[3] - end = mirna[i].split("\t")[4] - shift_start = int(start)-positions # shift the interval to the left - shift_end = int(end)+positions # shift the interval to the right - - # Replace the previous intervals with the new - mirna[i] = mirna[i].replace(start, str(shift_start)) - mirna[i] = mirna[i].replace(end, str(shift_end)) - - f.write(mirna[i] + "\n") - - f.close() - -#=================================================================================================================================== - -def bedtool(genome): - subprocess.call(["bedtools", "getfasta", "-fi", "/cvmfs/data.galaxyproject.org/byhand/"+genome+"/sam_index/"+genome+".fa", "-bed", "shifted_mirnas.bed", "-fo", "new_ref.fa", "-name", "-s"]) - -#=================================================================================================================================== - - -if __name__=='__main__': - - starttime = time.time() - q = Queue() - p1 = Process(target=read_url(q)) - p1.start() - p1.join() - - file_mirna=q.get() - - p2 = [Process(target=write_gff(file_mirna))] - p2.extend([Process(target=new_gff(file_mirna))]) - [x.start() for x in p2] - [x.join() for x in p2] - - p3 = Process(target=bedtool(args.genome)) - p3.start() - p3.join() - - print('That took {} seconds'.format(time.time() - starttime)) - diff -r 0aefa515b436 -r 6731958cdc38 pre_mirgene.py --- a/pre_mirgene.py Fri Oct 16 18:50:14 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ -import subprocess -import argparse -import time -import urllib -from multiprocessing import Process, Queue - -subprocess.call(['mkdir', 'out']) -parser = argparse.ArgumentParser() -parser.add_argument("-pos", "--positions", help="", action="store") -parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") -parser.add_argument("-sym", "--symbol", help="",action="store") - - -args = parser.parse_args() - -#======================================================================================================================================= - -def read_url1(path): - - response = urllib.urlopen(path) - data = response.read() - data1 = data.decode('utf-8') - data1 = data1.split("\n") - del data1[-1] - q.put(data1) - -#=================================================================================================================================== - -def custom_arms_mirgene(mat_mirna,pri_mirna): - mat_ext=[] - for i in range(0,len(mat_mirna),2): - if args.symbol in mat_mirna[i]: - mat_seq=mat_mirna[i+1] - - for j in range(0,len(pri_mirna),2): - if mat_mirna[i].split("_")[0] == pri_mirna[j].split("_")[0]: - temp_ext = pri_mirna[j+1].split(mat_seq) - mat_ext.append([mat_mirna[i],temp_ext[0][-int(args.positions):]+mat_seq+temp_ext[1][0:int(args.positions)]]) - - print(str(args.positions)+" positions shifted") - return(mat_ext) - -#=================================================================================================================================== - -def write_custom_arms(list,name,c): - - f = open(name, "w") - for x in list: - if c==1: - f.write(x[0]+'\n') - f.write(x[1]+'\n') - else: - f.write(x+'\n') - -#================================================================================================================================== - -if __name__=='__main__': - - starttime = time.time() - print(args.symbol) - q = Queue() - - mat_url = 'https://mirgenedb.org/fasta/ALL?mat=1' - p = [Process(target=read_url1(mat_url))] - star_url = 'https://mirgenedb.org/fasta/ALL?star=1' - p.extend([Process(target=read_url1(star_url))]) - pri_url = 'https://mirgenedb.org/static/data/ALL/ALL--pri-30-30.fas' - p.extend([Process(target=read_url1(pri_url))]) - [x.start() for x in p] - [x.join() for x in p] - - mat_mirna=q.get() - star_mirna=q.get() - - mat_mirna.extend(star_mirna) - for i in range(1,len(mat_mirna),2): - mat_mirna[i]=mat_mirna[i].replace("U","T") - pri_mirna=q.get() - - mat_ext=custom_arms_mirgene(mat_mirna,pri_mirna) - p1 = [Process(target=write_custom_arms(mat_mirna,"shifted_mirnas.bed",0))] - p1.extend([Process(target=write_custom_arms(pri_mirna,"original_mirnas.bed",0))]) - p1.extend([Process(target=write_custom_arms(mat_ext,"new_ref.fa",1))]) - [x.start() for x in p1] - [x.join() for x in p1] - - print('That took {} seconds'.format(time.time() - starttime)) - -