Mercurial > repos > glogobyte > viztool
comparison viz_ultra.py @ 15:6db3bd727fde draft
Uploaded
author | glogobyte |
---|---|
date | Wed, 28 Oct 2020 07:34:56 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
14:e51ebc767701 | 15:6db3bd727fde |
---|---|
1 import argparse | |
2 from viz_graphs import * | |
3 import sys | |
4 import pandas as pd | |
5 import matplotlib.pyplot as plt | |
6 import matplotlib.patches as mpatches | |
7 import matplotlib.font_manager as font_manager | |
8 import time | |
9 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value | |
10 | |
11 | |
12 ################################################################################################################################################################################################################## | |
13 | |
14 def top_diff(miRNA_info, number,flag,l): | |
15 | |
16 Kind=[] | |
17 | |
18 miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True) | |
19 miRNA_info = miRNA_info[:number] | |
20 miRNA_info.sort(key = lambda x: x[0]) | |
21 | |
22 for x in miRNA_info: | |
23 if x[1] > 0: | |
24 Kind.append(True) | |
25 elif x[1] < 0: | |
26 Kind.append(False) | |
27 else: | |
28 Kind.append("Zero") | |
29 | |
30 top_miRNA = {"Names": [x[0] for x in miRNA_info], | |
31 "Log2FC": [x[1] for x in miRNA_info], | |
32 "Kind": Kind}; | |
33 | |
34 df_miRNA = pd.DataFrame(data=top_miRNA) | |
35 df_miRNA = df_miRNA.sort_values(by=['Names']) | |
36 if df_miRNA.empty==False: | |
37 h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'})) | |
38 figure = plt.gcf() # get current figure | |
39 figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18) | |
40 up_reg = mpatches.Patch(color='green', label='Upregulated') | |
41 down_reg = mpatches.Patch(color='red', label='Downregulated') | |
42 font = font_manager.FontProperties(weight='bold', style='normal') | |
43 l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
44 h1.set_ylabel(" ", fontsize=3, fontweight='bold') | |
45 h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold') | |
46 plt.axvline(x=0, color="k") | |
47 | |
48 plt.grid(axis='y', linewidth=0.2) | |
49 plt.grid(axis='x', linewidth=0.2) | |
50 if flag=='t': | |
51 plt.savefig('tem.png', bbox_inches='tight', dpi=300) | |
52 if flag=='nt': | |
53 plt.savefig('non.png', bbox_inches='tight', dpi=300) | |
54 | |
55 #################################################################################################################################################################################################################### | |
56 | |
57 def unique(sequence): | |
58 seen = set() | |
59 return [x for x in sequence if not (x in seen or seen.add(x))] | |
60 | |
61 ########################################################################################################################################################################################################################################################################### | |
62 | |
63 def top_scatter_non(matures,isoforms,non_temp,uni_names,number): | |
64 | |
65 mat_names=[] | |
66 mat_log2fc=[] | |
67 | |
68 iso_names=[] | |
69 iso_log2fc=[] | |
70 | |
71 non_temp_names=[] | |
72 non_temp_log2fc=[] | |
73 | |
74 count=0 | |
75 for x in uni_names: | |
76 flag = False | |
77 if count<number: | |
78 for y in matures: | |
79 if x in y[0]: | |
80 mat_log2fc.append(y[1]) | |
81 mat_names.append(x) | |
82 flag=True | |
83 for y in isoforms: | |
84 if x in y[0]: | |
85 iso_log2fc.append(y[1]) | |
86 iso_names.append(x) | |
87 flag=True | |
88 for y in non_temp: | |
89 if x in y[0]: | |
90 non_temp_log2fc.append(y[1]) | |
91 non_temp_names.append(x) | |
92 flag=True | |
93 if flag==True: | |
94 count+=1 | |
95 | |
96 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
97 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
98 non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc)) | |
99 | |
100 iso_df.sort_values(by=['names']) | |
101 mat_df.sort_values(by=['names']) | |
102 non_df.sort_values(by=['names']) | |
103 | |
104 fig, ax = plt.subplots() | |
105 | |
106 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red') | |
107 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green') | |
108 h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='blue') | |
109 | |
110 l3 = plt.legend([h1,h2,h3],["Reference miRNA","Non-template","Template isomiRs"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
111 plt.axvline(x=0, color="k") | |
112 plt.grid(axis='y', linewidth=0.2) | |
113 plt.grid(axis='x', linewidth=0.2) | |
114 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
115 plt.yticks(rotation=0,ha="right", fontsize=10) | |
116 plt.xticks(rotation=0,ha="right", fontsize=10) | |
117 plt.tight_layout() | |
118 figure = plt.gcf() # get current figure | |
119 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
120 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
121 | |
122 ######################################################################################################################################################################################################################################### | |
123 def top_scatter_tem(matures,isoforms,uni_names,number): | |
124 | |
125 mat_names=[] | |
126 mat_log2fc=[] | |
127 | |
128 iso_names=[] | |
129 iso_log2fc=[] | |
130 | |
131 count=0 | |
132 for x in uni_names: | |
133 flag = False | |
134 if count<number: | |
135 for y in matures: | |
136 if x in y[0]: | |
137 mat_log2fc.append(y[1]) | |
138 mat_names.append(x) | |
139 flag=True | |
140 for y in isoforms: | |
141 if x in y[0]: | |
142 iso_log2fc.append(y[1]) | |
143 iso_names.append(x) | |
144 flag=True | |
145 if flag==True: | |
146 count+=1 | |
147 | |
148 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
149 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
150 | |
151 iso_df.sort_values(by=['names']) | |
152 mat_df.sort_values(by=['names']) | |
153 | |
154 fig, ax = plt.subplots() | |
155 | |
156 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red') | |
157 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green') | |
158 | |
159 l3 = plt.legend([h1,h3],["Reference miRNA","Template isomiRs"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
160 plt.axvline(x=0, color="k") | |
161 plt.grid(axis='y', linewidth=0.2) | |
162 plt.grid(axis='x', linewidth=0.2) | |
163 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
164 plt.yticks(rotation=0,ha="right", fontsize=10) | |
165 plt.xticks(rotation=0,ha="right", fontsize=10) | |
166 plt.tight_layout() | |
167 figure = plt.gcf() # get current figure | |
168 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
169 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
170 | |
171 | |
172 ############################################################################################################################################################################################################################################## | |
173 def preproccess(non_templated,matures,isoforms,log2fc,pval): | |
174 | |
175 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
176 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
177 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
178 mat_iso = mat+iso | |
179 | |
180 if not non_temp and not mat and not iso: | |
181 sys.exit("There aren't entries which meet these criteria") | |
182 | |
183 mat.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
184 iso.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
185 non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
186 | |
187 all=mat+iso+non_temp | |
188 all.sort(key = lambda x: abs(float(x[1])), reverse=True) | |
189 names=[x[0].split("_")[0] for x in all] | |
190 uni_names=unique(names) | |
191 | |
192 diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
193 diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
194 diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
195 | |
196 diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
197 diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
198 diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
199 | |
200 return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso | |
201 | |
202 ################################################################################################################################################################################################################################################################## | |
203 starttime = time.time() | |
204 | |
205 parser = argparse.ArgumentParser() | |
206 parser.add_argument("-in", "--input", help="choose type of analysis", action="store") | |
207 parser.add_argument("-p_value", "--pval", help="choose type of analysis", action="store") | |
208 parser.add_argument("-fc", "--log2fc", help="choose type of analysis", action="store") | |
209 parser.add_argument("-top", "--top_mirnas", help="choose type of analysis", action="store") | |
210 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") | |
211 parser.add_argument("-statistic", "--stat", help="tool directory path", action="store") | |
212 parser.add_argument("-diff_tool", "--tool", help="tool directory path", action="store") | |
213 | |
214 args = parser.parse_args() | |
215 | |
216 l=Lock() | |
217 number = int(args.top_mirnas) | |
218 log2fc = float(args.log2fc) | |
219 pval = float(args.pval) | |
220 | |
221 if args.tool=="2": | |
222 | |
223 raw_EdgeR = read(args.input,0) | |
224 EdgeR = [x.rstrip("\n").split("\t") for x in raw_EdgeR] | |
225 del EdgeR[0] | |
226 | |
227 if args.stat=="1": | |
228 non_templated = [[x[0],x[1],x[4]] for x in EdgeR if "__" in x[0] and x[1]!="NA" and x[4]!="NA"] | |
229 matures = [[x[0],x[1],x[4]] for x in EdgeR if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[4]!="NA"] | |
230 isoforms = [[x[0],x[1],x[4]] for x in EdgeR if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[4]!="NA"] | |
231 else: | |
232 non_templated = [[x[0],x[1],x[5]] for x in EdgeR if "__" in x[0] and x[1]!="NA" and x[5]!="NA"] | |
233 matures = [[x[0],x[1],x[5]] for x in EdgeR if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[5]!="NA"] | |
234 isoforms = [[x[0],x[1],x[5]] for x in EdgeR if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[5]!="NA"] | |
235 | |
236 if args.tool=="1": | |
237 | |
238 raw_Deseq = read(args.input,0) | |
239 Deseq = [x.rstrip("\n").split("\t") for x in raw_Deseq] | |
240 | |
241 if args.stat=="1": | |
242 non_templated = [[x[0],x[2],x[5]] for x in Deseq if "__" in x[0] and x[2]!="NA" and x[5]!="NA"] | |
243 matures = [[x[0],x[2],x[5]] for x in Deseq if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[5]!="NA"] | |
244 isoforms = [[x[0],x[2],x[5]] for x in Deseq if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[5]!="NA"] | |
245 else: | |
246 non_templated = [[x[0],x[2],x[6]] for x in Deseq if "__" in x[0] and x[2]!="NA" and x[6]!="NA"] | |
247 matures = [[x[0],x[2],x[6]] for x in Deseq if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[6]!="NA"] | |
248 isoforms = [[x[0],x[2],x[6]] for x in Deseq if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[6]!="NA"] | |
249 | |
250 | |
251 diff_matures,diff_isoforms,diff_non_templated,names,non_temp,mat_iso = preproccess(non_templated,matures,isoforms,log2fc,pval) | |
252 | |
253 if non_templated!=[]: | |
254 analysis="2" | |
255 p=[Process(target=top_diff,args=(non_temp,number,"nt",l))] | |
256 p.extend([Process(target=top_diff,args=(mat_iso,number,"t",l))]) | |
257 p.extend([Process(target=top_scatter_non,args=(diff_matures,diff_isoforms,diff_non_templated,names,number))]) | |
258 | |
259 else: | |
260 analysis="1" | |
261 p=[Process(target=top_diff,args=(mat_iso,number,"t"))] | |
262 p.extend([Process(target=top_scatter_tem,args=(diff_matures,diff_isoforms,names,number))]) | |
263 | |
264 [x.start() for x in p] | |
265 [x.join() for x in p] | |
266 | |
267 pdf_after_DE(analysis,args.top_mirnas) | |
268 | |
269 print('That took {} seconds'.format(time.time() - starttime)) | |
270 |