comparison viz_functions.py @ 26:6528239cedf0 draft

Uploaded
author glogobyte
date Sun, 05 Dec 2021 13:29:46 +0000
parents
children
comparison
equal deleted inserted replaced
25:a85854b520c9 26:6528239cedf0
1 import pandas as pd
2 import matplotlib.patches as mpatches
3 import matplotlib.font_manager as font_manager
4 import matplotlib.pyplot as plt
5 import sys
6
7 #########################################################################################
8
9 # Read a file and return it as a list
10 def read(path, flag):
11 if flag == 0:
12 with open(path) as fp:
13 file=fp.readlines()
14 fp.close()
15 return file
16
17 if flag == 1:
18 with open(path) as fp:
19 file = fp.read().splitlines()
20 fp.close()
21 return file
22
23 # Write a list to a txt file
24 def write(path, list):
25 with open(path,'w') as fp:
26 for x in list:
27 fp.write(str("\t".join(x[1:-1])))
28 fp.close()
29
30
31 ################################################################################################################################################################>
32
33 def top_diff(miRNA_info, number,flag,l):
34
35 Kind=[]
36
37 miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True)
38 miRNA_info = miRNA_info[:number]
39 miRNA_info.sort(key = lambda x: x[0])
40
41 for x in miRNA_info:
42 if x[1] > 0:
43 Kind.append(True)
44 elif x[1] < 0:
45 Kind.append(False)
46 else:
47 Kind.append("Zero")
48
49 top_miRNA = {"Names": [x[0] for x in miRNA_info],
50 "Log2FC": [x[1] for x in miRNA_info],
51 "Kind": Kind};
52
53 df_miRNA = pd.DataFrame(data=top_miRNA)
54 df_miRNA = df_miRNA.sort_values(by=['Names'])
55 if df_miRNA.empty==False:
56 h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'}))
57 figure = plt.gcf() # get current figure
58 figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18)
59 up_reg = mpatches.Patch(color='green', label='Upregulated')
60 down_reg = mpatches.Patch(color='red', label='Downregulated')
61 font = font_manager.FontProperties(weight='bold', style='normal')
62 l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0)
63 h1.set_ylabel(" ", fontsize=3, fontweight='bold')
64 h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold')
65 plt.axvline(x=0, color="k")
66
67 plt.grid(axis='y', linewidth=0.2)
68 plt.grid(axis='x', linewidth=0.2)
69 if flag=='t':
70 plt.savefig('tem.png', bbox_inches='tight', dpi=300)
71 if flag=='nt':
72 plt.savefig('non.png', bbox_inches='tight', dpi=300)
73
74
75 ################################################################################################################################################################>
76
77 def unique(sequence):
78 seen = set()
79 return [x for x in sequence if not (x in seen or seen.add(x))]
80
81 ################################################################################################################################################################>
82
83 def top_scatter_non(matures,isoforms,non_temp,uni_names,number):
84
85 mat_names=[]
86 mat_log2fc=[]
87
88 iso_names=[]
89 iso_log2fc=[]
90
91 non_temp_names=[]
92 non_temp_log2fc=[]
93
94 count=0
95 for x in uni_names:
96 flag = False
97 if count<number:
98 for y in matures:
99 if x in y[0]:
100 mat_log2fc.append(y[1])
101 mat_names.append(x)
102 flag=True
103 for y in isoforms:
104 if x in y[0]:
105 iso_log2fc.append(y[1])
106 iso_names.append(x)
107 flag=True
108 for y in non_temp:
109 if x in y[0]:
110 non_temp_log2fc.append(y[1])
111 non_temp_names.append(x)
112 flag=True
113 if flag==True:
114 count+=1
115
116 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc))
117 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc))
118 non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc))
119
120 iso_df.sort_values(by=['names'])
121 mat_df.sort_values(by=['names'])
122 non_df.sort_values(by=['names'])
123
124 fig, ax = plt.subplots()
125
126 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4)
127 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4)
128 h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='orange',alpha=0.4)
129
130 l3 = plt.legend([h1,h2,h3],["RefSeq miRNA","Non-templated isomiR","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0)
131 plt.axvline(x=0, color="k")
132 plt.grid(axis='y', linewidth=0.2)
133 plt.grid(axis='x', linewidth=0.2)
134 plt.xlabel("Log2FC", fontsize=12, fontweight='bold')
135 plt.yticks(rotation=0,ha="right", fontsize=10)
136 plt.xticks(rotation=0,ha="right", fontsize=10)
137 plt.tight_layout()
138 figure = plt.gcf() # get current figure
139 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18)
140 plt.savefig('a2.png', bbox_inches='tight', dpi=300)
141
142 #########################################################################################################################################################################################################################################
143
144 def top_scatter_tem(matures,isoforms,uni_names,number):
145
146 mat_names=[]
147 mat_log2fc=[]
148
149 iso_names=[]
150 iso_log2fc=[]
151
152 count=0
153 for x in uni_names:
154 flag = False
155 if count<number:
156 for y in matures:
157 if x in y[0]:
158 mat_log2fc.append(y[1])
159 mat_names.append(x)
160 flag=True
161 for y in isoforms:
162 if x in y[0]:
163 iso_log2fc.append(y[1])
164 iso_names.append(x)
165 flag=True
166 if flag==True:
167 count+=1
168
169 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc))
170 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc))
171
172 iso_df.sort_values(by=['names'])
173 mat_df.sort_values(by=['names'])
174
175 fig, ax = plt.subplots()
176
177 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4)
178 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4)
179
180 l3 = plt.legend([h1,h3],["RefSeq miRNA","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0)
181 plt.axvline(x=0, color="k")
182 plt.grid(axis='y', linewidth=0.2)
183 plt.grid(axis='x', linewidth=0.2)
184 plt.xlabel("Log2FC", fontsize=12, fontweight='bold')
185 plt.yticks(rotation=0,ha="right", fontsize=10)
186 plt.xticks(rotation=0,ha="right", fontsize=10)
187 plt.tight_layout()
188 figure = plt.gcf() # get current figure
189 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18)
190 plt.savefig('a2.png', bbox_inches='tight', dpi=300)
191
192
193 ##############################################################################################################################################################################################################################################
194
195 def preproccess(non_templated,matures,isoforms,log2fc,pval,stat):
196
197 if stat=="3":
198 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])>pval]
199 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])>pval]
200 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])>pval]
201 else:
202 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval]
203 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval]
204 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval]
205
206 mat_iso = mat+iso
207
208 if not non_temp and not mat and not iso:
209 sys.exit("There aren't entries which meet these criteria")
210
211 mat.sort(key = lambda x: abs(float(x[1])),reverse=True)
212 iso.sort(key = lambda x: abs(float(x[1])),reverse=True)
213 non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True)
214
215 all=mat+iso+non_temp
216 all.sort(key = lambda x: abs(float(x[1])), reverse=True)
217 names=[x[0].split("_")[0] for x in all]
218 uni_names=unique(names)
219
220 diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names]
221 diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names]
222 diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names]
223
224 diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True)
225 diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True)
226 diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True)
227
228 return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso
229
230 ################################################################################################################################################################################################################################################>
231