Mercurial > repos > glogobyte > armdb

--- a/mirbase.loc.sample	Wed Oct 28 08:14:50 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-# bowtie2_indices.loc.sample
-# This is a *.loc.sample file distributed with Galaxy that enables tools
-# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2.
-# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
-# First create these data files and save them in your own data directory structure.
-# Then, create a bowtie_indices.loc file to use those indexes with tools.
-# Copy this file, save it with the same name (minus the .sample),
-# follow the format examples, and store the result in this directory.
-# The file should include an one line entry for each index set.
-# The path points to the "basename" for the set, not a specific file.
-# It has four text columns seperated by TABS.
-#
-# <unique_build_id>     <dbkey> <display_name>  <file_base_path>
-#
-# So, for example, if you had hg18 indexes stored in:
-#
-#    /depot/data2/galaxy/hg19/bowtie2/
-#
-# containing hg19 genome and hg19.*.bt2 files, such as:
-#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.fa
-#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.1.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 18:56 hg19canon.2.bt2
-#    -rw-rw-r-- 1 james   james   3.3K Feb 10 16:54 hg19canon.3.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 16:54 hg19canon.4.bt2
-#    -rw-rw-r-- 1 james   james   914M Feb 10 20:45 hg19canon.rev.1.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 20:45 hg19canon.rev.2.bt2
-#
-# then the bowtie2_indices.loc entry could look like this:
-#
-#
-hg38	homo sapiens	hsa.gff3	hsa
-#
-#
-#
-#
--- a/mirgene.loc.sample	Wed Oct 28 08:14:50 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-# bowtie2_indices.loc.sample
-# This is a *.loc.sample file distributed with Galaxy that enables tools
-# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2.
-# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
-# First create these data files and save them in your own data directory structure.
-# Then, create a bowtie_indices.loc file to use those indexes with tools.
-# Copy this file, save it with the same name (minus the .sample),
-# follow the format examples, and store the result in this directory.
-# The file should include an one line entry for each index set.
-# The path points to the "basename" for the set, not a specific file.
-# It has four text columns seperated by TABS.
-#
-# <unique_build_id>     <dbkey> <display_name>  <file_base_path>
-#
-# So, for example, if you had hg18 indexes stored in:
-#
-#    /depot/data2/galaxy/hg19/bowtie2/
-#
-# containing hg19 genome and hg19.*.bt2 files, such as:
-#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.fa
-#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.1.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 18:56 hg19canon.2.bt2
-#    -rw-rw-r-- 1 james   james   3.3K Feb 10 16:54 hg19canon.3.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 16:54 hg19canon.4.bt2
-#    -rw-rw-r-- 1 james   james   914M Feb 10 20:45 hg19canon.rev.1.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 20:45 hg19canon.rev.2.bt2
-#
-# then the bowtie2_indices.loc entry could look like this:
-#
-#
-Hsa	Human (Homo sapiens)
-#
-#
-#
-#
-#
--- a/pre_mirbase.py	Wed Oct 28 08:14:50 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,138 +0,0 @@
-from itertools import groupby
-import sys
-import subprocess
-import argparse
-import time
-import urllib.request
-from multiprocessing import Process, Queue
-import itertools
-
-subprocess.call(['mkdir', 'out'])
-parser = argparse.ArgumentParser()
-
-parser.add_argument("-pos", "--positions", help="", action="store")
-parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
-parser.add_argument("-gen", "--genome", help="tool directory path", action="store")
-parser.add_argument("-gff3", "--gff", help="",action="store")
-
-args = parser.parse_args()
-
-#=======================================================================================================================================
-
-
-#-----------------------Download and read the file hsa.gff3---------------------------------
-
-def read_url(q):
-
-
-    url = 'ftp://mirbase.org/pub/mirbase/CURRENT/genomes/'+args.gff
-    #url = 'ftp://mirbase.org/pub/mirbase/21/genomes/hsa.gff3'
-    response = urllib.request.urlopen(url)
-    data = response.read()
-    file_mirna = data.decode('utf-8')
-    file_mirna = file_mirna.split("\n")
-    q.put(file_mirna)
-
-
-def write_gff(file_mirna):
-    f = open('original_mirnas.bed', "w")
-
-    for i in range(len(file_mirna)):
-        f.write(file_mirna[i] + "\n")
-
-
-#------------------------Processed the file with mature mirnas-------------------------------
-
-
-def new_gff(file_mirna):
-
-    mirna = []   # new list with shifted mirnas
-    positions =int(args.positions)   # positions shifted
-    print(str(positions)+" positions shifted")
-    names=[]
-    # Remove lines which conatain the word "primary"
-    for i in range(len(file_mirna)):
-
-        if "primary" not in file_mirna[i]:
-            mirna.append(file_mirna[i])
-
-            if "chr" in file_mirna[i]:
-                a=file_mirna[i].split("\t")[0]
-                b=file_mirna[i].split("\t")[6]
-                c=file_mirna[i].split("=")[3].split(";")[0]
-                names.append([a,b,c])
-
-    names.sort()
-    sublists=[]
-
-    [sublists.append([item] * names.count(item)) for item in names if names.count(item)>=2]
-    sublists.sort()
-    sublists=list(sublists for sublists, _ in itertools.groupby(sublists))
-    unique_names=[[x[0][0],x[0][2]] for x in sublists]
-
-    for x in unique_names:
-        flag = 0
-        for i in range(len(mirna)):
-
-              if "chr" in mirna[i] and mirna[i].split("=")[3].split(";")[0]==x[1] and x[0]==mirna[i].split("\t")[0]:
-                 flag+=1
-                 ktr=mirna[i].split(";")[0]+";"+mirna[i].split(";")[1]+";"+mirna[i].split(";")[2]+"-{"+str(flag)+"}"+";"+mirna[i].split(";")[3]
-                 mirna[i]=ktr
-
-
-    f = open('shifted_mirnas.bed', "w")
-
-    for i in range(len(mirna)):
-
-        if "chr" in mirna[i]:
-
-            # change the name of current mirna
-            mirna_name_1 = mirna[i].split("=")[3]
-            mirna_name_2 = mirna[i].split("=")[4]
-            # mirna_name_2 = mirna_name_2.split(";")[0]
-            mirna_name_1 = mirna_name_1.split(";")[0]+"_"+mirna_name_2+"_"+mirna[i].split("\t")[0]
-            mirna[i] = mirna[i].replace("miRNA", mirna_name_1)
-
-            # Shift the position of mirna
-            start = mirna[i].split("\t")[3]
-            end = mirna[i].split("\t")[4]
-            shift_start = int(start)-positions          # shift the interval to the left
-            shift_end = int(end)+positions              # shift the interval to the right
-
-            # Replace the previous intervals with the new
-            mirna[i] = mirna[i].replace(start, str(shift_start))
-            mirna[i] = mirna[i].replace(end, str(shift_end))
-
-            f.write(mirna[i] + "\n")
-
-    f.close()
-
-#===================================================================================================================================
-
-def bedtool(genome):
-    subprocess.call(["bedtools", "getfasta", "-fi", "/cvmfs/data.galaxyproject.org/byhand/"+genome+"/sam_index/"+genome+".fa", "-bed", "shifted_mirnas.bed", "-fo", "new_ref.fa", "-name", "-s"])
-
-#===================================================================================================================================
-
-
-if __name__=='__main__':
-
-    starttime = time.time()
-    q = Queue()
-    p1 = Process(target=read_url(q))
-    p1.start()
-    p1.join()
-
-    file_mirna=q.get()
-
-    p2 = [Process(target=write_gff(file_mirna))]
-    p2.extend([Process(target=new_gff(file_mirna))])
-    [x.start() for x in p2]
-    [x.join() for x in p2]
-
-    p3 = Process(target=bedtool(args.genome))
-    p3.start()
-    p3.join()
-
-    print('That took {} seconds'.format(time.time() - starttime))
-
--- a/pre_mirgene.py	Wed Oct 28 08:14:50 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-import subprocess
-import argparse
-import time
-import urllib.request
-from multiprocessing import Process, Queue
-
-subprocess.call(['mkdir', 'out'])
-parser = argparse.ArgumentParser()
-parser.add_argument("-pos", "--positions", help="", action="store")
-parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
-parser.add_argument("-sym", "--symbol", help="",action="store")
-
-
-args = parser.parse_args()
-
-#=======================================================================================================================================
-
-def read_url1(path):
-
-    response = urllib.request.urlopen(path)
-    data = response.read()
-    data1 = data.decode('utf-8')
-    data1 = data1.split("\n")
-    del data1[-1]
-    q.put(data1)
-
-#===================================================================================================================================
-
-def custom_arms_mirgene(mat_mirna,pri_mirna):
-   mat_ext=[]
-   for i in range(0,len(mat_mirna),2):
-       if args.symbol in mat_mirna[i]:
-          mat_seq=mat_mirna[i+1]
-
-          for j in range(0,len(pri_mirna),2):
-              if mat_mirna[i].split("_")[0] == pri_mirna[j].split("_")[0]:
-                 temp_ext = pri_mirna[j+1].split(mat_seq)
-                 mat_ext.append([mat_mirna[i],temp_ext[0][-int(args.positions):]+mat_seq+temp_ext[1][0:int(args.positions)]])
-
-   print(str(args.positions)+" positions shifted")
-   return(mat_ext)
-
-#===================================================================================================================================
-
-def write_custom_arms(list,name,c):
-
-    f = open(name, "w")
-    for x in list:
-       if c==1:
-          f.write(x[0]+'\n')
-          f.write(x[1]+'\n')
-       else:
-          f.write(x+'\n')
-
-#==================================================================================================================================
-
-if __name__=='__main__':
-
-    starttime = time.time()
-    print(args.symbol)
-    q = Queue()
-
-    mat_url = 'https://mirgenedb.org/fasta/ALL?mat=1'
-    p = [Process(target=read_url1(mat_url))]
-    star_url = 'https://mirgenedb.org/fasta/ALL?star=1'
-    p.extend([Process(target=read_url1(star_url))])
-    pri_url = 'https://mirgenedb.org/static/data/ALL/ALL--pri-30-30.fas'
-    p.extend([Process(target=read_url1(pri_url))])
-    [x.start() for x in p]
-    [x.join() for x in p]
-
-    mat_mirna=q.get()
-    star_mirna=q.get()
-
-    mat_mirna.extend(star_mirna)
-    for i in range(1,len(mat_mirna),2):
-        mat_mirna[i]=mat_mirna[i].replace("U","T")
-    pri_mirna=q.get()
-
-    mat_ext=custom_arms_mirgene(mat_mirna,pri_mirna)
-    p1 = [Process(target=write_custom_arms(mat_mirna,"shifted_mirnas.bed",0))]
-    p1.extend([Process(target=write_custom_arms(pri_mirna,"original_mirnas.bed",0))])
-    p1.extend([Process(target=write_custom_arms(mat_ext,"new_ref.fa",1))])
-    [x.start() for x in p1]
-    [x.join() for x in p1]
-
-    print('That took {} seconds'.format(time.time() - starttime))
-
-