annotate mirbase_ultra_v2.py @ 14:3ad9701c7749 draft

Uploaded
author glogobyte
date Thu, 22 Oct 2020 07:30:47 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
14
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1 from mirbase_functions import *
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
2 from mirbase_graphs import *
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
3 import itertools
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
4 import time
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
5 import sys
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
6 import os
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
7 import urllib.request
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
8 import gzip
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
9 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
10 import subprocess
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
11 import argparse
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
12 from collections import OrderedDict
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
13 from matplotlib.backends.backend_pdf import PdfPages
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
14 import pandas as pd
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
15 from math import pi
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
16 import numpy as np
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
17 import matplotlib.pyplot as plt
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
18 from matplotlib.ticker import PercentFormatter
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
19 import seaborn as sns
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
20 import scipy.stats as stats
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
21 from plotnine import *
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
22 import math
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
23 import re
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
24 import matplotlib.ticker as mtick
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
25 import copy
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
26
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
27 subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre'])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
28
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
29 parser = argparse.ArgumentParser()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
30 parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
31 parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
32 parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] )
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
33 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
34 parser.add_argument("-gen", "--org_name", help="tool directory path", action="store")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
35 parser.add_argument("-program", "--pro", help="choose type of analysis", action="store")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
36 parser.add_argument("-f", "--flag", help="choose the database", action="store")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
37 parser.add_argument("-umis", "--umi", help="choose the database", action="store")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
38 parser.add_argument("-percentage", "--per", help="choose the database", action="store")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
39 parser.add_argument("-counts", "--count", help="choose the database", action="store")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
40 parser.add_argument("-name1", "--n1", help="choose the database", action="store")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
41 parser.add_argument("-name2", "--n2", help="choose the database", action="store")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
42 args = parser.parse_args()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
43
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
44 #########################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
45
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
46 def collapse_sam(path):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
47
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
48 ini_sam=read(path,0)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
49 main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
50 intro_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" in x.split("\t")[0]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
51
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
52 uni_seq = []
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
53 for x in main_sam:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
54
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
55 if [x[2], x[9]] not in uni_seq:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
56 uni_seq.append([x[2], x[9]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
57
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
58 new_main_sam=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
59 incr_num=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
60 for i in range(len(uni_seq)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
61 count=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
62 incr_num+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
63 for y in main_sam:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
64 if uni_seq[i][1]==y[9] and uni_seq[i][0]==y[2]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
65 count+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
66 temp=y
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
67 temp[10]="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
68 temp[0]=str(incr_num)+"-"+str(count)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
69 new_main_sam.append(temp)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
70
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
71 new_sam=intro_sam+new_main_sam
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
72
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
73 return new_sam
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
74
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
75 #################################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
76
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
77 def duplicate_chroms_isoforms(List):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
78
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
79 dupes=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
80
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
81 for num in range(len(List)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
82
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
83 if [List[num][9],List[num][0],List[num][2]] not in dupes :
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
84 dupes.append([List[num][9],List[num][0],List[num][2]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
85
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
86 for x in List:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
87 for y in dupes:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
88 if x[9]==y[0] and x[0]==y[1] and x[2].split("_")[0]==y[2].split("_")[0] and x[2]!=y[2]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
89 y.append(x[2])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
90
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
91
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
92 double_List = [x[:] for x in List]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
93
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
94 chr_order=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
95 for x in dupes:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
96 temp = []
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
97 for i in range(2,len(x)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
98 if x[i].split("chr")[1].split("(")[0].isdigit():
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
99 temp.append(int(x[i].split("chr")[1].split("(")[1][0]+x[i].split("chr")[1].split("(")[0]))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
100 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
101 temp.append(x[i].split("chr")[1][0:4])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
102
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
103 for z in temp:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
104 if 'X(-)'==z or 'Y(-)'==z or 'X(+)'==z or 'Y(+)'==z:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
105 temp = [str(j) for j in temp]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
106 temp=list(set(temp))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
107 temp.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
108 chr_order.append(temp)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
109
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
110 final_dupes=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
111 for i in range(len(dupes)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
112 final_dupes.append([dupes[i][0],dupes[i][2].split("_")[0],dupes[i][1]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
113 for x in chr_order[i]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
114 result = re.match("[-+]?\d+$", str(x))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
115 if len(chr_order[i]) == len(set(chr_order[i])):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
116 if result is not None:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
117
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
118 if int(x)<0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
119 final_dupes[i][1]=final_dupes[i][1]+"_chr"+str(abs(int(x)))+"(-)"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
120 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
121 final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(abs(int(x)))+"(+)"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
122 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
123 final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(x)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
124 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
125 if result is not None:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
126 if int(x) < 0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
127 final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(-)"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
128 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
129 final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(+)"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
130 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
131 final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(x)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
132
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
133 final_dupes.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
134 final_dupes=list(final_dupes for final_dupes,_ in itertools.groupby(final_dupes))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
135
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
136 for i in range(len(double_List)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
137 for x in final_dupes:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
138
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
139 if double_List[i][9] == x[0] and double_List[i][0] == x[2] and len(double_List[i][2].split("_")) >3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
140 gg=str("_"+double_List[i][2].split("_")[-2]+"_"+double_List[i][2].split("_")[-1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
141 double_List[i][2] = x[1]+gg
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
142
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
143 if double_List[i][9]==x[0] and double_List[i][0]== x[2] and len(double_List[i][2].split("_"))==3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
144 double_List[i][2]=x[1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
145 List[i][2] = x[1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
146
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
147 List.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
148 new_list=list(List for List,_ in itertools.groupby(List))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
149
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
150 double_List.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
151 new_double_List = list(double_List for double_List, _ in itertools.groupby(double_List))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
152
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
153 return new_list, new_double_List
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
154
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
155
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
156 #############################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
157
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
158 def sam(mature_mirnas,path,name,con,l,samples,data,names,unmap_seq,samples_mirna_names,deseq,LHE_names,umi,ini_sample,unmap_counts):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
159
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
160 # read the sam file
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
161 ini_sam=read(path,0)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
162 new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
163 unique_seq = [x for x in new_main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
164
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
165 sorted_uni_arms = []
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
166
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
167 for i in range(len(mature_mirnas)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
168 tmp_count_reads = 0 # calculate the total number of reads
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
169 tmp_count_seq = 0 # calculate the total number of sequences
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
170 for j in range(len(unique_seq)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
171
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
172 if "{" in unique_seq[j][2].split("_")[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
173 official=unique_seq[j][2].split("_")[0][:-4]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
174 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
175 official=unique_seq[j][2].split("_")[0]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
176
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
177 if mature_mirnas[i].split(" ")[0][1:] == official:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
178
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
179 temp_mature = mature_mirnas[i+1].strip().replace("U", "T")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
180 off_part = longestSubstring(temp_mature, unique_seq[j][9])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
181
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
182 mat_diff = temp_mature.split(off_part)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
183 mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
184
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
185 unique_diff = unique_seq[j][9].split(off_part)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
186 unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
187
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
188 # Problem with hsa-miR-8485
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
189 if mat_diff[1]!=0 and unique_diff[1]!=0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
190 unique_seq[j]=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
191 pre_pos = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
192 post_pos = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
193
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
194 elif mat_diff[0]!=0 and unique_diff[0]!=0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
195 unique_seq[j]=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
196 pre_pos = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
197 post_pos = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
198
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
199 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
200 pre_pos = mat_diff[0]-unique_diff[0]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
201 post_pos = unique_diff[1]-mat_diff[1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
202 tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
203 tmp_count_seq = tmp_count_seq+1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
204
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
205 if pre_pos != 0 or post_pos != 0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
206 if pre_pos == 0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
207 unique_seq[j][2] = unique_seq[j][2] + "_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
208 elif post_pos == 0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
209 unique_seq[j][2] = unique_seq[j][2] + "_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
210 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
211 unique_seq[j][2] = unique_seq[j][2]+"_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
212
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
213 for x in range(unique_seq.count(1)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
214 unique_seq.remove(1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
215 if tmp_count_reads != 0 and tmp_count_seq != 0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
216 sorted_uni_arms.append([mature_mirnas[i].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
217 sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
218 dedup_unique_seq,double_fil_uni_seq=duplicate_chroms_isoforms(unique_seq)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
219
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
220 for y in sorted_uni_arms:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
221 counts=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
222 seqs=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
223 for x in double_fil_uni_seq:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
224 if y[0]==x[2].split("_")[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
225 counts+=int(x[0].split("-")[1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
226 seqs+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
227
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
228 y[1]=seqs
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
229 y[2]=counts
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
230
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
231 LHE=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
232 l.acquire()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
233 if con=="c":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
234 LHE.extend(z[2] for z in double_fil_uni_seq)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
235 for y in double_fil_uni_seq:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
236 samples_mirna_names.append([y[2],y[9]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
237 deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
238 LHE_names.extend(LHE)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
239 unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
240 unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
241 names.append(name)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
242 samples.append(dedup_unique_seq)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
243 data.append([con,name,double_fil_uni_seq,sorted_uni_arms])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
244 ini_sample.append(new_main_sam)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
245
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
246 if con=="t":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
247 LHE.extend(z[2] for z in double_fil_uni_seq)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
248 for y in double_fil_uni_seq:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
249 samples_mirna_names.append([y[2],y[9]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
250 deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
251 LHE_names.extend(LHE)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
252 unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
253 unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
254 names.append(name)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
255 samples.append(dedup_unique_seq)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
256 data.append([con,name,double_fil_uni_seq,sorted_uni_arms])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
257 ini_sample.append(new_main_sam)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
258 l.release()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
259
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
260
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
261 ######################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
262 """
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
263
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
264 Read a sam file from Bowtie and do the followings:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
265
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
266 1) Remove reverse stranded mapped reads
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
267 2) Remove unmapped reads
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
268 3) Remove all sequences with reads less than 11 reads
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
269 4) Sort the arms with the most sequences in decreading rate
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
270 5) Sort the sequences of every arm with the most reads in decreasing rate
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
271 6) Calculate total number of sequences of every arm
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
272 7) Calculate total number of reads of sequences of every arm.
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
273 8) Store all the informations in a txt file
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
274
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
275 """
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
276
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
277 def non_sam(mature_mirnas,path,name,con,l,data,names,n_deseq,n_samples_mirna_names,n_LHE_names):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
278
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
279 ini_sam=read(path,0)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
280 new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
281 unique_seq=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
282 unique_seq = [x for x in new_main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
283
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
284 uni_seq=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
285 # Calculate the shifted positions for every isomir and add them to the name of it
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
286 sorted_uni_arms = []
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
287 for i in range(1,len(mature_mirnas),2):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
288 tmp_count_reads = 0 # calculate the total number of reads
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
289 tmp_count_seq = 0 # calculate the total number of sequences
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
290
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
291 for j in range(len(unique_seq)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
292
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
293 temp_mature = mature_mirnas[i].strip().replace("U", "T")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
294
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
295 if temp_mature in unique_seq[j][9]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
296
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
297 off_part = longestSubstring(temp_mature, unique_seq[j][9])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
298
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
299 mat_diff = temp_mature.split(off_part)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
300 mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
301
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
302 unique_diff = unique_seq[j][9].split(off_part)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
303 if len(unique_diff)<=2:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
304 unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
305
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
306 pre_pos = mat_diff[0]-unique_diff[0]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
307 post_pos = unique_diff[1]-mat_diff[1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
308
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
309 lengthofmir = len(off_part) + post_pos
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
310 if pre_pos == 0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
311 tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
312 tmp_count_seq = tmp_count_seq + 1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
313
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
314 if pre_pos == 0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
315
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
316 t_name=unique_seq[j].copy()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
317 t_name[2]=mature_mirnas[i - 1].split(" ")[0][1:] + "__" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
318 uni_seq.append(t_name)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
319
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
320
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
321 if tmp_count_reads != 0 and tmp_count_seq != 0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
322 sorted_uni_arms.append([mature_mirnas[i-1].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
323
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
324
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
325 sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
326 unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq))))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
327
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
328 LHE=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
329
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
330 l.acquire()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
331 if con=="c":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
332 LHE.extend(x[2] for x in unique_seq if x[2]!="*")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
333 for x in unique_seq:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
334 if x[2]!="*":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
335 n_samples_mirna_names.append([x[2],x[9]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
336 n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
337 n_LHE_names.extend(LHE)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
338 names.append(name)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
339 data.append([con,name,unique_seq,sorted_uni_arms])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
340
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
341
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
342 if con=="t":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
343 LHE.extend(x[2] for x in unique_seq if x[2]!="*")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
344 for x in unique_seq:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
345 if x[2]!="*":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
346 n_samples_mirna_names.append([x[2],x[9]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
347 n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
348 n_LHE_names.extend(LHE)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
349 names.append(name)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
350 data.append([con,name,unique_seq,sorted_uni_arms])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
351 l.release()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
352
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
353 #####################################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
354 def deseq2_temp(samples_mirna_names,deseq,con,l):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
355
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
356 samples_mirna_names.sort(key=lambda x:[0])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
357 for i in range(len(deseq)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
358 for y in samples_mirna_names:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
359 flag = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
360 for x in deseq[i]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
361 if y[0] == x[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
362 flag = 1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
363 break
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
364
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
365 if flag == 0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
366 deseq[i].append([y[0], "0", y[1]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
367
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
368 [deseq[i].sort(key=lambda x: x[0]) for i, _ in enumerate(deseq)]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
369 deseq_final = [[x[0],x[2]] for x in deseq[0]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
370 [deseq_final[z].append(deseq[i][j][1]) for z,_ in enumerate(deseq_final) for i, _ in enumerate(deseq) for j,_ in enumerate(deseq[i]) if deseq_final[z][0] == deseq[i][j][0]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
371
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
372 l.acquire()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
373 if con=="c":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
374 q1.put(deseq_final)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
375
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
376 if con=="t":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
377 q2.put(deseq_final)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
378 l.release()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
379
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
380
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
381 ####################################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
382
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
383 def main_temp(LH2E, LH2E_names, LH8E, LH8E_names,flag,names_con,names_tre,filter_LH8E,filter_LH2E,raw_LH8E,raw_LH2E):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
384
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
385 LH8E_add_names = [x for x in LH2E_names if x not in LH8E_names]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
386 LH2E_add_names = [x for x in LH8E_names if x not in LH2E_names]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
387
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
388 LH8E_add_names.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
389 LH2E_add_names.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
390 LH8E_add_names = list(LH8E_add_names for LH8E_add_names,_ in itertools.groupby(LH8E_add_names))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
391 LH2E_add_names = list(LH2E_add_names for LH2E_add_names,_ in itertools.groupby(LH2E_add_names))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
392
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
393 LH2E.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
394 LH8E.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
395 LH2E = list(LH2E for LH2E,_ in itertools.groupby(LH2E))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
396 LH8E = list(LH8E for LH8E,_ in itertools.groupby(LH8E))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
397
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
398 zeros=["0"]*(len(LH8E[0])-2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
399 [LH8E_add_names[i].extend(zeros) for i,_ in enumerate(LH8E_add_names)]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
400 LH8E=LH8E+LH8E_add_names
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
401
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
402 zeros=["0"]*(len(LH2E[0])-2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
403 [LH2E_add_names[i].extend(zeros) for i,_ in enumerate(LH2E_add_names)]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
404 LH2E=LH2E+LH2E_add_names
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
405
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
406 dupes=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
407 final_LH2E =[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
408
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
409 for num,_ in enumerate(LH2E):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
410
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
411 if LH2E[num][1] not in final_LH2E and LH2E[num][0] not in final_LH2E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
412 final_LH2E.append(LH2E[num][1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
413 final_LH2E.append(LH2E[num][0])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
414 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
415 dupes.append(LH2E[num][1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
416
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
417
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
418 dupes=list(set(dupes))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
419
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
420 dupes=[[x] for x in dupes]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
421
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
422 for x in LH2E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
423 for y in dupes:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
424 if x[1]==y[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
425 fl=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
426 if len(y)==1:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
427 y.append(x[0])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
428 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
429 for i in range(1,len(y)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
430 if y[i].split("_")[0]==x[0].split("_")[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
431 fl=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
432 if len(x[0])<len(y[i]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
433 del y[i]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
434 y.append(x[0])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
435 break
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
436
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
437 if fl==0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
438 y.append((x[0]))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
439
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
440 for y in dupes:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
441 if len(y)>2:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
442 for i in range(len(y)-1,1,-1):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
443 y[1]=y[1]+"/"+y[i]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
444 del y[i]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
445
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
446 for x in LH2E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
447 for y in dupes:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
448 if x[1]==y[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
449 x[0]=y[1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
450
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
451 for x in LH8E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
452 for y in dupes:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
453 if x[1]==y[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
454 x[0]=y[1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
455
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
456
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
457 LH2E.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
458 LH2E=list(LH2E for LH2E,_ in itertools.groupby(LH2E))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
459
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
460 LH8E.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
461 LH8E=list(LH8E for LH8E,_ in itertools.groupby(LH8E))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
462
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
463 LH8E_new=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
464 LH2E_new=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
465
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
466 if int(args.per)!=-1:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
467 percent=int(args.per)/100
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
468
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
469 c_col_filter=round(percent*(len(LH2E[1])-2))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
470 t_col_filter=round(percent*(len(LH8E[1])-2))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
471
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
472 for i, _ in enumerate(LH2E):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
473 c_cols=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
474 t_cols=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
475
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
476 c_cols=sum([1 for j in range(len(LH2E[i])-2) if int(LH2E[i][j+2])>=int(args.count)])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
477 t_cols=sum([1 for j in range(len(LH8E[i])-2) if int(LH8E[i][j+2])>=int(args.count)])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
478
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
479 if c_cols>=c_col_filter or t_cols>=t_col_filter:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
480 LH8E_new.append(LH8E[i])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
481 LH2E_new.append(LH2E[i])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
482
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
483 filter_LH2E.extend(LH2E_new)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
484 filter_LH8E.extend(LH8E_new)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
485 raw_LH2E.extend(LH2E)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
486 raw_LH8E.extend(LH8E)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
487
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
488 ##################################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
489
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
490 def write_main(raw_LH2E, raw_LH8E, fil_LH2E, fil_LH8E, names_con, names_tre, flag):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
491
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
492 if flag == 1 and int(args.per)!=-1:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
493 fp = open('Counts/Filtered '+args.n2 +' Templated Counts', 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
494 fp.write("Name\t")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
495 fp.write("Sequence")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
496 for y in names_tre:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
497 fp.write("\t"+y)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
498
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
499 for x in fil_LH8E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
500 fp.write("\n%s" % "\t".join(x))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
501 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
502
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
503 fp = open('Counts/Filtered '+args.n1+' Templated Counts', 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
504 fp.write("Name\t")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
505 fp.write("Sequence")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
506 for y in names_con:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
507 fp.write("\t"+y)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
508
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
509 for x in fil_LH2E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
510 fp.write("\n%s" % "\t".join(x))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
511 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
512
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
513
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
514 if flag == 2 and int(args.per)!=-1:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
515 fp = open('Counts/Filtered '+args.n2+' Non-Templated Counts', 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
516 fp.write("Name\t")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
517 fp.write("Sequence")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
518 for y in names_tre:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
519 fp.write("\t"+y)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
520
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
521
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
522 for x in fil_LH8E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
523 fp.write("\n%s" % "\t".join(x))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
524 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
525
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
526 fp = open('Counts/Filtered '+args.n1+' Non-Templated Counts', 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
527 fp.write("Name\t")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
528 fp.write("Sequence")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
529 for y in names_con:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
530 fp.write("\t"+y)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
531
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
532 for x in fil_LH2E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
533 fp.write("\n%s" % "\t".join(x))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
534 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
535
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
536
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
537 if flag == 1:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
538 fp = open('Counts/Raw '+args.n2+' Templated Counts', 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
539 fp.write("Name\t")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
540 fp.write("Sequence")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
541 for y in names_tre:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
542 fp.write("\t"+y)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
543
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
544 for x in raw_LH8E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
545 fp.write("\n%s" % "\t".join(x))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
546 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
547
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
548 fp = open('Counts/Raw '+args.n1+' Templated Counts', 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
549 fp.write("Name\t")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
550 fp.write("Sequence")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
551 for y in names_con:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
552 fp.write("\t"+y)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
553
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
554 for x in raw_LH2E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
555 fp.write("\n%s" % "\t".join(x))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
556 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
557
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
558
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
559 if flag == 2:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
560 fp = open('Counts/Raw '+args.n2+' Non-Templated Counts', 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
561 fp.write("Name\t")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
562 fp.write("Sequence")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
563 for y in names_tre:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
564 fp.write("\t"+y)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
565
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
566
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
567 for x in raw_LH8E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
568 fp.write("\n%s" % "\t".join(x))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
569 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
570
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
571 fp = open('Counts/Raw '+args.n1+' Non-Templated Counts', 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
572 fp.write("Name\t")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
573 fp.write("Sequence")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
574 for y in names_con:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
575 fp.write("\t"+y)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
576
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
577 for x in raw_LH2E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
578 fp.write("\n%s" % "\t".join(x))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
579 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
580
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
581
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
582 #########################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
583
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
584 def ssamples(names,samp,folder,pro):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
585
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
586 for i in range(2,len(samp[0])):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
587
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
588 fp = open(folder+names[i-2]+'.txt','w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
589 fp.write("miRNA id"+"\t"+names[i-2]+"\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
590
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
591 for x in samp:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
592 fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
593 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
594
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
595 ##################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
596
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
597 def DB_write(con,name,unique_seq,sorted_uni_arms,f):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
598
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
599 if f==1:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
600 # Write a txt file with all the information
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
601 if con=="c":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
602 fp = open('split1/'+name, 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
603
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
604 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
605 if con=="t":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
606 fp = open('split2/'+name, 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
607 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
608
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
609
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
610 for i in range(len(sorted_uni_arms)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
611 temp = []
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
612 for j in range(len(unique_seq)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
613
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
614 if sorted_uni_arms[i][0] in unique_seq[j][2].split("_")[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
615
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
616 temp.append(unique_seq[j])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
617
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
618 temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
619 fp.write("*********************************************************************************************************\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
620 fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
621 fp.write("*********************************************************************************************************\n\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
622 [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
623 fp.write("\n" + "\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
624 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
625
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
626 if f==2:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
627
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
628 if con=="c":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
629 fp = open('split3/'+name, 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
630 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
631 if con=="t":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
632 fp = open('split4/'+name, 'w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
633 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
634
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
635
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
636 for i in range(len(sorted_uni_arms)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
637 temp = []
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
638 for j in range(len(unique_seq)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
639 if sorted_uni_arms[i][0]==unique_seq[j][2].split("__")[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
640 temp.append(unique_seq[j])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
641 if temp!=[]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
642 temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
643 fp.write("*********************************************************************************************************\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
644 fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
645 fp.write("*********************************************************************************************************\n\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
646 [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
647 fp.write("\n" + "\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
648 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
649
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
650
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
651 ##########################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
652
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
653 def new_mat_seq(pre_unique_seq,mat_mirnas,l):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
654
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
655 unique_iso = []
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
656 for x in pre_unique_seq:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
657 if len(x[2].split("_"))==3:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
658 for y in pre_unique_seq:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
659 if x[2] in y[2] and int(x[0].split("-")[1])<int(y[0].split("-")[1]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
660 if any(y[2] in lst2 for lst2 in unique_iso)==False:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
661 y[2]=">"+y[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
662 unique_iso.append(y)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
663 l.acquire()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
664 for x in unique_iso:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
665 mat_mirnas.append(x[2])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
666 mat_mirnas.append(x[9])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
667 l.release()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
668
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
669 #########################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
670 def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
671
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
672 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
673 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
674 c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
675 t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
676
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
677 c_templ = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
678 c_tem_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
679 c_mature = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
680 c_mat_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
681 t_templ = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
682 t_tem_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
683 t_mature = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
684 t_mat_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
685
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
686 c_non = len(c_non_samples)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
687 c_non_counts = sum(x[2] for x in c_non_samples)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
688 t_non = len(t_non_samples)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
689 t_non_counts = sum(x[2] for x in t_non_samples)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
690
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
691 c_unmap = c_unmap - c_non
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
692 t_unmap = c_unmap - t_non
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
693
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
694 c_unmap_counts=c_unmap_counts - c_non_counts
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
695 t_unmap_counts=t_unmap_counts - t_non_counts
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
696
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
697
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
698 for x in c_samples:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
699
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
700 if "/" not in x[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
701 if "chr" in x[0].split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
702 c_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
703 c_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
704 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
705 c_templ+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
706 c_tem_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
707 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
708 f=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
709 for y in x[0].split("/"):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
710 if "chr" in y.split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
711 c_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
712 c_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
713 f=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
714 break
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
715 if f==0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
716 c_templ+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
717 c_tem_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
718
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
719 for x in t_samples:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
720
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
721 if "/" not in x[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
722 if "chr" in x[0].split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
723 t_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
724 t_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
725 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
726 t_templ+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
727 t_tem_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
728 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
729 f=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
730 for y in x[0].split("/"):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
731 if "chr" in y.split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
732 t_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
733 t_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
734 f=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
735 break
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
736 if f==0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
737 t_templ+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
738 t_tem_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
739
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
740 fig = plt.figure(figsize=(7,5))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
741 labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template'
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
742 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
743 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
744 ax1 = plt.subplot2grid((1,2),(0,0))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
745 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
746 [x.set_fontsize(8) for x in texts]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
747 plt.title('Control Group (reads)',fontsize=12)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
748 labels = 'miRNA RefSeq','Template', 'Unmapped','non-template'
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
749 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
750 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
751 ax2 = plt.subplot2grid((1,2),(0,1))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
752 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
753 [x.set_fontsize(8) for x in texts]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
754 plt.title('Treated Group (reads)', fontsize=12)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
755 plt.savefig('pie_non.png',dpi=300)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
756
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
757 ######################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
758
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
759 def merging_names(LH2E_copy,new):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
760
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
761 dupes=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
762 final_LH2E =[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
763
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
764 for num in range(len(LH2E_copy)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
765
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
766 if LH2E_copy[num][1] not in final_LH2E and LH2E_copy[num][0] not in final_LH2E:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
767 final_LH2E.append(LH2E_copy[num][1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
768 final_LH2E.append(LH2E_copy[num][0])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
769 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
770 dupes.append(LH2E_copy[num][1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
771
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
772 dupes=list(set(dupes))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
773
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
774 for i in range(len(dupes)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
775 dupes[i]=[dupes[i]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
776
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
777 for x in LH2E_copy:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
778 for y in dupes:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
779 if x[1]==y[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
780 fl=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
781 if len(y)==1:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
782 y.append(x[0])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
783 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
784 for i in range(1,len(y)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
785 if y[i].split("_")[0]==x[0].split("_")[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
786 fl=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
787 if len(x[0])<len(y[i]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
788 del y[i]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
789 y.append(x[0])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
790 break
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
791
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
792 if fl==0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
793 y.append((x[0]))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
794
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
795 for y in dupes:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
796 if len(y)>2:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
797 for i in range(len(y)-1,1,-1):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
798 y[1]=y[1]+"/"+y[i]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
799 del y[i]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
800
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
801
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
802 for x in LH2E_copy:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
803 for y in dupes:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
804 if x[1]==y[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
805 x[0]=y[1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
806
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
807 LH2E_copy.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
808 LH2E_copy=list(LH2E_copy for LH2E_copy,_ in itertools.groupby(LH2E_copy))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
809
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
810 new.extend(LH2E_copy)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
811
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
812
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
813 ######################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
814 def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
815
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
816 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
817 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
818
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
819 c_templ = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
820 c_tem_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
821 c_mature = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
822 c_mat_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
823 t_templ = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
824 t_tem_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
825 t_mature = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
826 t_mat_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
827
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
828 for x in c_samples:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
829
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
830 if "/" not in x[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
831 if "chr" in x[0].split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
832 c_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
833 c_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
834 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
835 c_templ+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
836 c_tem_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
837 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
838 f=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
839 for y in x[0].split("/"):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
840 if "chr" in y.split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
841 c_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
842 c_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
843 f=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
844 break
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
845 if f==0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
846 c_templ+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
847 c_tem_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
848
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
849 for x in t_samples:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
850
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
851 if "/" not in x[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
852 if "chr" in x[0].split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
853 t_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
854 t_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
855 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
856 t_templ+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
857 t_tem_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
858 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
859 f=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
860 for y in x[0].split("/"):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
861 if "chr" in y.split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
862 t_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
863 t_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
864 f=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
865 break
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
866 if f==0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
867 t_templ+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
868 t_tem_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
869
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
870
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
871 fig = plt.figure()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
872 labels = 'miRNA RefSeq','Template', 'Unmapped'
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
873 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
874 colors = ['gold', 'yellowgreen', 'lightskyblue']
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
875 explode = (0.2, 0.05, 0.1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
876 ax1 = plt.subplot2grid((1,2),(0,0))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
877 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
878 [x.set_fontsize(8) for x in texts]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
879 plt.title('Control group (reads)', fontsize=12)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
880 labels = 'miRNA RefSeq','Template', 'Unmapped'
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
881 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
882 colors = ['gold', 'yellowgreen', 'lightskyblue']
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
883 explode = (0.2, 0.05, 0.1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
884 ax2 = plt.subplot2grid((1,2),(0,1))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
885 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
886 [x.set_fontsize(8) for x in texts]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
887 plt.title('Treated group (reads)',fontsize = 12)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
888 plt.savefig('pie_tem.png',dpi=300)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
889
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
890 ###################################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
891
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
892 def make_spider(merge_LH2E,merge_LH8E):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
893
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
894 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
895 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
896
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
897 c_5 = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
898 c_5_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
899 c_3 = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
900 c_3_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
901 c_both =0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
902 c_both_counts=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
903 c_mature = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
904 c_mat_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
905 c_exception=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
906 c_exception_counts=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
907
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
908
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
909 t_5 = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
910 t_5_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
911 t_3 = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
912 t_3_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
913 t_both = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
914 t_both_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
915 t_mature = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
916 t_mat_counts = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
917 t_exception = 0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
918 t_exception_counts=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
919
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
920 for x in c_samples:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
921
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
922 if "/" not in x[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
923 if "chr" in x[0].split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
924 c_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
925 c_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
926 elif 0 == int(x[0].split("_")[-1]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
927 c_5+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
928 c_5_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
929 elif 0 == int(x[0].split("_")[-2]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
930 c_3+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
931 c_3_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
932 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
933 c_both+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
934 c_both_counts+=x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
935
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
936 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
937 f=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
938 for y in x[0].split("/"):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
939 if "chr" in y.split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
940 c_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
941 c_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
942 f=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
943 break
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
944 if f==0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
945 for y in x[0].split("/"):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
946 c_exception+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
947 c_exception_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
948
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
949
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
950 for x in t_samples:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
951
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
952 if "/" not in x[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
953 if "chr" in x[0].split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
954 t_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
955 t_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
956 elif 0 == int(x[0].split("_")[-1]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
957 t_5+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
958 t_5_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
959 elif 0 == int(x[0].split("_")[-2]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
960 t_3+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
961 t_3_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
962 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
963 t_both+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
964 t_both_counts+=x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
965
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
966 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
967 f=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
968 for y in x[0].split("/"):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
969 if "chr" in y.split("_")[-1]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
970 t_mature+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
971 t_mat_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
972 f=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
973 break
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
974 if f==0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
975 for y in x[0].split("/"):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
976 t_exception+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
977 t_exception_counts += x[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
978
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
979
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
980 c_all = c_5+c_3+c_both+c_mature+c_exception
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
981 c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
982
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
983 t_all = t_5+t_3+t_both+t_mature + t_exception
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
984 t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
985
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
986 c_5 = round(c_5/c_all*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
987 c_3 = round(c_3/c_all*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
988 c_both = round(c_both/c_all*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
989 c_mature = round(c_mature/c_all*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
990 c_exception = round(c_exception/c_all*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
991
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
992 c_5_counts = round(c_5_counts/c_all_counts*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
993 c_3_counts = round(c_3_counts/c_all_counts*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
994 c_both_counts = round(c_both_counts/c_all_counts*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
995 c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
996 c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
997
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
998 t_5 = round(t_5/t_all*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
999 t_3 = round(t_3/t_all*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1000 t_both = round(t_both/t_all*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1001 t_mature = round(t_mature/t_all*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1002 t_exception = round(t_exception/t_all*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1003
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1004 t_5_counts = round(t_5_counts/t_all_counts*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1005 t_3_counts = round(t_3_counts/t_all_counts*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1006 t_both_counts = round(t_both_counts/t_all_counts*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1007 t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1008 t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1009
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1010 radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1011 radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1012
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1013 df=pd.DataFrame({
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1014 'group':['Controls','Treated'],
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1015 """5' and 3' isomiRs""":[c_both,t_both],
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1016 """3' isomiRs""":[c_3,t_3],
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1017 'miRNA RefSeq':[c_mature,t_mature],
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1018 """5' isomiRs""":[c_5,t_5],
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1019 'Others*':[c_exception,t_exception]})
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1020
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1021 df1=pd.DataFrame({
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1022 'group':['Controls','Treated'],
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1023 """5' and 3' isomiRs""":[c_both_counts,t_both_counts],
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1024 """3' isomiRs""":[c_3_counts,t_3_counts],
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1025 'miRNA RefSeq':[c_mat_counts,t_mat_counts],
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1026 """5' isomiRs""":[c_5_counts,t_5_counts],
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1027 'Others*':[c_exception_counts,t_exception_counts]})
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1028
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1029 spider_last(df,radar_max,1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1030 spider_last(df1,radar_max_counts,2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1031
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1032
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1033 def spider_last(df,radar_max,flag):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1034 # ------- PART 1: Create background
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1035 fig = plt.figure()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1036 # number of variable
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1037 categories=list(df)[1:]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1038 N = len(categories)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1039
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1040 # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1041 angles = [n / float(N) * 2 * pi for n in range(N)]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1042 angles += angles[:1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1043
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1044 # Initialise the spider plot
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1045 ax = plt.subplot(111, polar=True)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1046
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1047 # If you want the first axis to be on top:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1048 ax.set_theta_offset(pi/2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1049 ax.set_theta_direction(-1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1050
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1051 # Draw one axe per variable + add labels labels yet
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1052 plt.xticks(angles[:-1], categories, fontsize=11)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1053
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1054 # Draw ylabels
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1055 radar_max=round(radar_max+radar_max*0.1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1056 mul=len(str(radar_max))-1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1057 maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1058 sep = round(maxi/4)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1059 plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1060 plt.ylim(0, maxi)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1061
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1062 # ------- PART 2: Add plots
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1063
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1064 # Plot each individual = each line of the data
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1065 # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1066
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1067 # Ind1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1068 values=df.loc[0].drop('group').values.flatten().tolist()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1069 values += values[:1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1070 ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1071 ax.fill(angles, values, 'b', alpha=0.1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1072
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1073 # Ind2
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1074 values=df.loc[1].drop('group').values.flatten().tolist()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1075 values += values[:1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1076 ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1077 ax.fill(angles, values, 'r', alpha=0.1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1078
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1079 # Add legend
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1080 if flag==1:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1081 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1082 plt.savefig('spider_non_red.png',dpi=300)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1083 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1084 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1085 plt.savefig('spider_red.png',dpi=300)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1086
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1087
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1088 #############################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1089 def hist_red(samples,flag):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1090 lengths=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1091 cat=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1092 total_reads=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1093 seq=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1094
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1095 if flag == "c":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1096 title = "Length Distribution of Control group (Redudant reads)"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1097 if flag == "t":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1098 title = "Length Distribution of Treated group (Redudant reads)"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1099
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1100 for i in samples:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1101 for x in i:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1102 lengths.append(len(x[9]))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1103 if x[1]=="0":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1104 seq.append([x[9],x[0].split("-")[1],"Mapped"])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1105 cat.append("Mapped")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1106 if x[1] == "4":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1107 seq.append([x[9],x[0].split("-")[1],"Unmapped"])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1108 cat.append("Unmapped")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1109
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1110 uni_len=list(set(lengths))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1111 uni_len=[x for x in uni_len if x<=35]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1112 low=min(lengths)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1113 up=max(lengths)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1114 seq.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1115 uni_seq=list(seq for seq,_ in itertools.groupby(seq))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1116 dim=up-low
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1117
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1118 if dim>20:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1119 s=5
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1120 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1121 s=8
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1122
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1123 total_reads+=sum([int(x[1]) for x in uni_seq])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1124
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1125 map_reads=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1126 unmap_reads=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1127 length=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1128 for y in uni_len:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1129 map_temp=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1130 unmap_temp=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1131 for x in uni_seq:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1132 if len(x[0])==y and x[2]=="Mapped":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1133 map_temp+=int(x[1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1134 if len(x[0])==y and x[2]=="Unmapped":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1135 unmap_temp+=int(x[1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1136 if y<=35:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1137 length.append(y)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1138 map_reads.append(round(map_temp/total_reads*100,2))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1139 unmap_reads.append(round(unmap_temp/total_reads*100,2))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1140
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1141 ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1142 ylim=ylim+ylim*20/100
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1143 fig, ax = plt.subplots()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1144 width=0.8
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1145 ax.bar(length, unmap_reads, width, label='Unmapped')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1146 h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1147 plt.xticks(np.arange(length[0], length[-1]+1, 1))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1148 plt.xlabel('Length (nt)',fontsize=14)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1149 plt.ylabel('Percentage',fontsize=14)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1150 plt.title(title,fontsize=14)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1151 ax.legend()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1152 plt.ylim([0, ylim])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1153 ax.grid(axis='y',linewidth=0.2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1154
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1155 if flag=='c':
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1156 plt.savefig('c_hist_red.png',dpi=300)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1157
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1158 if flag=='t':
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1159 plt.savefig('t_hist_red.png',dpi=300)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1160
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1161 #################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1162
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1163
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1164 def logo_seq_red(merge, flag):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1165
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1166 if flag=="c":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1167 titlos="Control group (Redundant)"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1168 file_logo="c_logo.png"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1169 file_bar="c_bar.png"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1170 if flag=="t":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1171 titlos="Treated group (Redundant)"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1172 file_logo="t_logo.png"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1173 file_bar="t_bar.png"
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1174
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1175 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1176
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1177 A=[0]*5
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1178 C=[0]*5
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1179 G=[0]*5
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1180 T=[0]*5
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1181 total_reads=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1182
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1183 for y in c_samples:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1184 if "/" in y[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1185 length=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1186 for x in y[0].split("/"):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1187 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1188
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1189 best=min(length)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1190 total_reads+=best[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1191 for i in range(5):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1192 if i<len(best[1]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1193 if best[1][i] == "A":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1194 A[i]+=best[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1195 elif best[1][i] == "C":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1196 C[i]+=best[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1197 elif best[1][i] == "G":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1198 G[i]+=best[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1199 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1200 T[i]+=best[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1201 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1202 total_reads+=y[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1203 for i in range(5):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1204 if i<len(y[0].split("_")[-1]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1205 if y[0].split("_")[-1][i] == "A":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1206 A[i]+=(y[2])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1207 elif y[0].split("_")[-1][i] == "C":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1208 C[i]+=(y[2])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1209 elif y[0].split("_")[-1][i] == "G":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1210 G[i]+=(y[2])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1211 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1212 T[i]+=y[2]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1213
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1214 A[:] = [round(x*100,1) / total_reads for x in A]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1215 C[:] = [round(x*100,1) / total_reads for x in C]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1216 G[:] = [round(x*100,1) / total_reads for x in G]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1217 T[:] = [round(x*100,1) / total_reads for x in T]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1218
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1219
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1220
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1221 data = {'A':A,'C':C,'G':G,'T':T}
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1222 df = pd.DataFrame(data, index=[1,2,3,4,5])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1223 h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1224 h.grid(axis='y',linewidth=0.2)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1225 plt.xticks(rotation=0, ha="right")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1226 plt.ylabel("Counts (%)",fontsize=18)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1227 plt.xlabel("Positions (nt)",fontsize=18)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1228 plt.title(titlos,fontsize=20)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1229 plt.tight_layout()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1230 plt.savefig(file_bar, dpi=300)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1231
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1232 import logomaker as lm
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1233 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1234 crp_logo.style_spines(visible=False)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1235 crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1236 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1237
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1238 # style using Axes methods
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1239 crp_logo.ax.set_title(titlos,fontsize=18)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1240 crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1241 crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1242 crp_logo.ax.xaxis.set_ticks_position('none')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1243 crp_logo.ax.xaxis.set_tick_params(pad=-1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1244 figure = plt.gcf()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1245 figure.set_size_inches(6, 4)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1246 crp_logo.fig.savefig(file_logo,dpi=300)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1247
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1248 ##########################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1249
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1250
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1251
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1252 def logo_seq_non_red(merge_LH2E):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1253
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1254 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1255
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1256 A=[0]*5
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1257 C=[0]*5
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1258 G=[0]*5
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1259 T=[0]*5
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1260
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1261 for y in c_samples:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1262 if "/" in y[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1263 length=[]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1264 for x in y[0].split("/"):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1265 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1266
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1267 best=min(length)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1268 for i in range(5):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1269 if i<len(best[1]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1270 if best[1][i] == "A":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1271 A[i]+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1272 elif best[1][i] == "C":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1273 C[i]+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1274 elif best[1][i] == "G":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1275 G[i]+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1276 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1277 T[i]+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1278 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1279 for i in range(5):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1280 if i<len(y[0].split("_")[-1]):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1281 if y[0].split("_")[-1][i] == "A":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1282 A[i]+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1283 elif y[0].split("_")[-1][i] == "C":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1284 C[i]+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1285 elif y[0].split("_")[-1][i] == "G":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1286 G[i]+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1287 else:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1288 T[i]+=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1289
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1290 data = {'A':A,'C':C,'G':G,'T':T}
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1291 df = pd.DataFrame(data, index=[1,2,3,4,5])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1292 h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"]))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1293 h.set_xlabel("Positions (nt)")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1294 h.set_ylabel("Unique sequences")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1295 plt.xticks(rotation=0, ha="right")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1296 plt.tight_layout()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1297 plt.savefig("bar2.png", dpi=300)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1298
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1299
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1300 import logomaker as lm
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1301 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1302
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1303 # style using Logo methods
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1304 crp_logo.style_spines(visible=False)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1305 crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1306 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1307
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1308 # style using Axes methods
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1309 crp_logo.ax.set_ylabel("Unique sequences", labelpad=5)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1310 crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1311 crp_logo.ax.xaxis.set_ticks_position('none')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1312 crp_logo.ax.xaxis.set_tick_params(pad=-1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1313 crp_logo.ax.set_title("Non-redundant")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1314 figure = plt.gcf()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1315 crp_logo.fig.savefig('logo2.png', dpi=300)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1316
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1317
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1318 ###################################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1319
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1320 def ssamples1(tem_names,tem_samp,non_names,non_samp,folder,pro):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1321
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1322 for i in range(2,len(tem_samp[0])):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1323
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1324 fp = open(folder+tem_names[i-2]+'.txt','w')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1325 fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1326
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1327 for x in tem_samp:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1328 fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1329
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1330 for j in range(len(non_names)):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1331 if non_names[j]==tem_names[i-2]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1332 for x in non_samp:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1333 fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1334 fp.close()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1335
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1336 ###################################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1337
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1338 def download_matures(matures,org_name):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1339
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1340 #url = 'ftp://mirbase.org/pub/mirbase/21/mature.fa.gz'
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1341 url = 'ftp://mirbase.org/pub/mirbase/CURRENT/mature.fa.gz'
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1342 data = urllib.request.urlopen(url).read()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1343 file_mirna = gzip.decompress(data).decode('utf-8')
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1344 file_mirna = file_mirna.split("\n")
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1345
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1346 for i in range(0,len(file_mirna)-1,2):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1347
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1348 if org_name in file_mirna[i]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1349 matures.append(file_mirna[i])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1350 matures.append(file_mirna[i+1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1351
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1352 ###################################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1353 def non_template_ref(sc,st,all_isoforms):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1354
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1355 pre_uni_seq_con = list(sc)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1356 pre_uni_seq_tre = list(st)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1357
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1358 for x in pre_uni_seq_con:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1359 for y in x:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1360 if ">"+y[2] not in all_isoforms and ")_" in y[2] :
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1361 all_isoforms.append(">"+y[2])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1362 all_isoforms.append(y[9])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1363
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1364
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1365 for x in pre_uni_seq_tre:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1366 for y in x:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1367 if ">"+y[2] not in all_isoforms and ")_" in y[2]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1368 all_isoforms.append(">"+y[2])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1369 all_isoforms.append(y[9])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1370
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1371 ################################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1372
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1373 def deseqe2(sample,mir_names,l,new_d,sample_name,sample_order):
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1374
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1375 for y in mir_names:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1376 flag=0
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1377 for x in sample:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1378 if y[0]==x[0]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1379 flag=1
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1380 break
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1381 if flag==0:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1382 sample.append([y[0],"0",y[1]])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1383
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1384 sample.sort(key=lambda x: x[0])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1385 sample=list(sample for sample,_ in itertools.groupby(sample))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1386
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1387 l.acquire()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1388 new_d.append(sample)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1389 sample_order.append(sample_name)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1390 l.release()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1391
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1392 ###############################################################################################################################################################################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1393
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1394 if __name__ == '__main__':
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1395
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1396 starttime = time.time()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1397
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1398 q1 = Queue()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1399 q2 = Queue()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1400 lock = Lock()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1401 manager = Manager()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1402
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1403 mature_mirnas=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1404 ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1405 ps_mature.start()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1406
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1407 args.control[0]=args.control[0][1:]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1408 args.control[len(args.control)-1][:-1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1409 control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1410
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1411 args.treated[0]=args.treated[0][1:]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1412 args.treated[len(args.treated)-1][:-1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1413 treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1414
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1415
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1416 ############## Detection of templated isoforms ################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1417
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1418 radar = manager.list([0,0,0,0])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1419 samples = manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1420 data= manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1421 names_con=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1422 samples_mirna_names=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1423 deseq=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1424 unmap_seq=manager.Value('i',0)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1425 unmap_counts=manager.Value('i',0)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1426 LH2E_names=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1427 ini_c_samples = manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1428
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1429
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1430 radar1 = manager.list([0,0,0,0])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1431 samples1 = manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1432 data1 = manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1433 names_tre = manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1434 samples_mirna_names1=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1435 deseq1=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1436 unmap1_seq = manager.Value('i',0)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1437 unmap1_counts = manager.Value('i',0)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1438 LH8E_names=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1439 ini_t_samples = manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1440 ps_mature.join()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1441
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1442
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1443 mature_mirnas=list(mature_mirnas)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1444
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1445
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1446 starttime1 = time.time()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1447 ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1448 ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1449
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1450 [p.start() for p in ps_sam]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1451 [p.join() for p in ps_sam]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1452 print('SAM took {} seconds'.format(time.time() - starttime1))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1453
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1454 ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1455 ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1456 [x.start() for x in ps_hist]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1457
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1458 starttime200=time.time()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1459
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1460 sc = list(samples)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1461 st = list(samples1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1462
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1463 names_con=list(names_con)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1464 names_tre=list(names_tre)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1465 samples_mirna_names=list(samples_mirna_names)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1466 samples_mirna_names.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1467 samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1468
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1469 samples_mirna_names1=list(samples_mirna_names1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1470 samples_mirna_names1.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1471 samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1472
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1473 deseq=list(deseq)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1474 deseq1=list(deseq1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1475
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1476 new_names_con=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1477 new_names_tre=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1478 new_deseq=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1479 new_deseq1=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1480 ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1481 ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1482
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1483 [z.start() for z in ps_deseq]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1484 [z.join() for z in ps_deseq]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1485 new_deseq=list(new_deseq)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1486 new_deseq1=list(new_deseq1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1487
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1488 LH2E=[[x[0],x[2]] for x in new_deseq[0]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1489 [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1490
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1491 LH8E=[[x[0],x[2]] for x in new_deseq1[0]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1492 [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1493
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1494 print('Deseq took {} seconds'.format(time.time() - starttime200))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1495
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1496 merg_nam_LH2E=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1497 merg_nam_LH8E=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1498
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1499 LH2E_copy=copy.deepcopy(list(LH2E))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1500 LH8E_copy=copy.deepcopy(list(LH8E))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1501
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1502 fil_sort_tre=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1503 fil_sort_con=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1504 raw_sort_tre=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1505 raw_sort_con=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1506
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1507 ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1508 ps_main.start()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1509
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1510 if args.anal=="2":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1511 all_iso = manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1512 ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1513 ps_non_iso.start()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1514
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1515 ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1516 ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1517 [x.start() for x in ps_merge]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1518 [x.join() for x in ps_merge]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1519
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1520 merg_nam_LH2E=list(merg_nam_LH2E)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1521 merg_nam_LH8E=list(merg_nam_LH8E)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1522
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1523 starttime2 = time.time()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1524 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1525 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1526 procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1527 if args.anal == "1":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1528 procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1529
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1530 [p.start() for p in procs]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1531
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1532
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1533 if args.anal=="1":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1534 [x.join() for x in ps_hist]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1535 [p.join() for p in procs]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1536 ps_pdf = Process(target=pdf_before_DE,args=(args.anal))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1537 ps_pdf.start()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1538
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1539 print('Graphs took {} seconds'.format(time.time() - starttime2))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1540
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1541 ps_main.join()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1542
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1543 fil_sort_con=list(fil_sort_con)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1544 fil_sort_tre=list(fil_sort_tre)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1545 if fil_sort_con==[]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1546 fil_sort_con=raw_sort_con
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1547 fil_sort_tre=raw_sort_tre
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1548
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1549 raw_sort_con=list(raw_sort_con)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1550 raw_sort_tre=list(raw_sort_tre)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1551 names_con=list(new_names_con)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1552 names_tre=list(new_names_tre)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1553
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1554 ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1555 ps_write.start()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1556
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1557 ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1558 ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1559 [p.start() for p in ps1_matrix]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1560
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1561 if args.anal=="1":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1562 ps_pdf.join()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1563 if args.anal=="2":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1564 [p.join() for p in procs]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1565 [x.join() for x in ps_hist]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1566
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1567 ps_write.join()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1568 [p.join() for p in ps1_matrix]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1569
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1570
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1571 ############################## Detection of Both #######################################
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1572
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1573 starttime10 = time.time()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1574
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1575 if args.anal == "2":
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1576
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1577 n_data= manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1578 n_names_con=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1579 n_samples_mirna_names=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1580 n_deseq=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1581 n_LH2E_names=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1582
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1583 n_data1 = manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1584 n_names_tre = manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1585 n_samples_mirna_names1=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1586 n_deseq1=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1587 n_LH8E_names=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1588
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1589
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1590 new_mat_mirnas = list(mature_mirnas)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1591 ps_non_iso.join()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1592
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1593 all_iso=list(all_iso)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1594 new_mat_mirnas.extend(all_iso)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1595
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1596 starttime11=time.time()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1597
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1598 ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1599 ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1600
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1601 [p.start() for p in ps_sam]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1602 [p.join() for p in ps_sam]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1603
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1604 print('Non-sam took {} seconds'.format(time.time() - starttime11))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1605
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1606 starttime12=time.time()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1607
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1608 n_names_con=list(n_names_con)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1609 n_names_tre=list(n_names_tre)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1610 n_samples_mirna_names=list(n_samples_mirna_names)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1611 n_samples_mirna_names.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1612 n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1613
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1614 n_samples_mirna_names1=list(n_samples_mirna_names1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1615 n_samples_mirna_names1.sort()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1616 n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1617
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1618 n_deseq=list(n_deseq)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1619 n_deseq1=list(n_deseq1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1620
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1621 new_n_names_con=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1622 new_n_names_tre=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1623 n_new_deseq=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1624 n_new_deseq1=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1625 ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1626 ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1627
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1628 [x.start() for x in ps_deseq]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1629 [x.join() for x in ps_deseq]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1630 n_new_deseq=list(n_new_deseq)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1631 n_new_deseq1=list(n_new_deseq1)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1632
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1633 n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1634 [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1635
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1636 n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1637 [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1638
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1639 print('Non-deseq took {} seconds'.format(time.time() - starttime12))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1640
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1641 merg_nam_n_LH2E=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1642 merg_nam_n_LH8E=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1643
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1644 n_LH2E_copy=copy.deepcopy(list(n_LH2E))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1645 n_LH8E_copy=copy.deepcopy(list(n_LH8E))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1646
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1647 n_sort_tre=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1648 n_sort_con=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1649
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1650 n_fil_sort_con=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1651 n_fil_sort_tre=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1652 n_raw_sort_con=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1653 n_raw_sort_tre=manager.list()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1654
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1655 ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1656 ps_main.start()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1657
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1658 ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1659 ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1660 [p.start() for p in ps_merge]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1661 [p.join() for p in ps_merge]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1662
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1663 merg_nam_n_LH2E=list(merg_nam_n_LH2E)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1664 merg_nam_n_LH8E=list(merg_nam_n_LH8E)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1665
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1666 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1667 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1668 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1669 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1670 procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1671
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1672 starttime13=time.time()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1673 [p.start() for p in procs]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1674 [p.join() for p in procs]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1675
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1676 print('Graphs took {} seconds'.format(time.time() - starttime13))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1677
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1678 procs1 = Process(target=pdf_before_DE,args=(args.anal))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1679 procs1.start()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1680
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1681 starttime14=time.time()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1682 ps_main.join()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1683
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1684 n_fil_sort_con=list(n_fil_sort_con)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1685 n_fil_sort_tre=list(n_fil_sort_tre)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1686 if n_fil_sort_con==[]:
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1687 n_fil_sort_con=n_raw_sort_con
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1688 n_fil_sort_tre=n_raw_sort_tre
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1689
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1690 n_raw_sort_con=list(n_raw_sort_con)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1691 n_raw_sort_tre=list(n_raw_sort_tre)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1692 n_names_con=list(new_n_names_con)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1693 n_names_tre=list(new_n_names_tre)
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1694
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1695 ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1696 ps_write.start()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1697
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1698 ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1699 ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))])
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1700 [p.start() for p in ps1_matrix]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1701
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1702 ps_write.join()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1703 [p.join() for p in ps1_matrix]
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1704 procs1.join()
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1705 print('That took {} seconds'.format(time.time() - starttime10))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1706 print('That took {} seconds'.format(time.time() - starttime))
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1707
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1708
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1709
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1710
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1711
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1712
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1713
3ad9701c7749 Uploaded
glogobyte
parents:
diff changeset
1714