Mercurial > repos > glogobyte > viztool
comparison viz_functions.py @ 26:6528239cedf0 draft
Uploaded
author | glogobyte |
---|---|
date | Sun, 05 Dec 2021 13:29:46 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
25:a85854b520c9 | 26:6528239cedf0 |
---|---|
1 import pandas as pd | |
2 import matplotlib.patches as mpatches | |
3 import matplotlib.font_manager as font_manager | |
4 import matplotlib.pyplot as plt | |
5 import sys | |
6 | |
7 ######################################################################################### | |
8 | |
9 # Read a file and return it as a list | |
10 def read(path, flag): | |
11 if flag == 0: | |
12 with open(path) as fp: | |
13 file=fp.readlines() | |
14 fp.close() | |
15 return file | |
16 | |
17 if flag == 1: | |
18 with open(path) as fp: | |
19 file = fp.read().splitlines() | |
20 fp.close() | |
21 return file | |
22 | |
23 # Write a list to a txt file | |
24 def write(path, list): | |
25 with open(path,'w') as fp: | |
26 for x in list: | |
27 fp.write(str("\t".join(x[1:-1]))) | |
28 fp.close() | |
29 | |
30 | |
31 ################################################################################################################################################################> | |
32 | |
33 def top_diff(miRNA_info, number,flag,l): | |
34 | |
35 Kind=[] | |
36 | |
37 miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True) | |
38 miRNA_info = miRNA_info[:number] | |
39 miRNA_info.sort(key = lambda x: x[0]) | |
40 | |
41 for x in miRNA_info: | |
42 if x[1] > 0: | |
43 Kind.append(True) | |
44 elif x[1] < 0: | |
45 Kind.append(False) | |
46 else: | |
47 Kind.append("Zero") | |
48 | |
49 top_miRNA = {"Names": [x[0] for x in miRNA_info], | |
50 "Log2FC": [x[1] for x in miRNA_info], | |
51 "Kind": Kind}; | |
52 | |
53 df_miRNA = pd.DataFrame(data=top_miRNA) | |
54 df_miRNA = df_miRNA.sort_values(by=['Names']) | |
55 if df_miRNA.empty==False: | |
56 h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'})) | |
57 figure = plt.gcf() # get current figure | |
58 figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18) | |
59 up_reg = mpatches.Patch(color='green', label='Upregulated') | |
60 down_reg = mpatches.Patch(color='red', label='Downregulated') | |
61 font = font_manager.FontProperties(weight='bold', style='normal') | |
62 l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
63 h1.set_ylabel(" ", fontsize=3, fontweight='bold') | |
64 h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold') | |
65 plt.axvline(x=0, color="k") | |
66 | |
67 plt.grid(axis='y', linewidth=0.2) | |
68 plt.grid(axis='x', linewidth=0.2) | |
69 if flag=='t': | |
70 plt.savefig('tem.png', bbox_inches='tight', dpi=300) | |
71 if flag=='nt': | |
72 plt.savefig('non.png', bbox_inches='tight', dpi=300) | |
73 | |
74 | |
75 ################################################################################################################################################################> | |
76 | |
77 def unique(sequence): | |
78 seen = set() | |
79 return [x for x in sequence if not (x in seen or seen.add(x))] | |
80 | |
81 ################################################################################################################################################################> | |
82 | |
83 def top_scatter_non(matures,isoforms,non_temp,uni_names,number): | |
84 | |
85 mat_names=[] | |
86 mat_log2fc=[] | |
87 | |
88 iso_names=[] | |
89 iso_log2fc=[] | |
90 | |
91 non_temp_names=[] | |
92 non_temp_log2fc=[] | |
93 | |
94 count=0 | |
95 for x in uni_names: | |
96 flag = False | |
97 if count<number: | |
98 for y in matures: | |
99 if x in y[0]: | |
100 mat_log2fc.append(y[1]) | |
101 mat_names.append(x) | |
102 flag=True | |
103 for y in isoforms: | |
104 if x in y[0]: | |
105 iso_log2fc.append(y[1]) | |
106 iso_names.append(x) | |
107 flag=True | |
108 for y in non_temp: | |
109 if x in y[0]: | |
110 non_temp_log2fc.append(y[1]) | |
111 non_temp_names.append(x) | |
112 flag=True | |
113 if flag==True: | |
114 count+=1 | |
115 | |
116 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
117 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
118 non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc)) | |
119 | |
120 iso_df.sort_values(by=['names']) | |
121 mat_df.sort_values(by=['names']) | |
122 non_df.sort_values(by=['names']) | |
123 | |
124 fig, ax = plt.subplots() | |
125 | |
126 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4) | |
127 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4) | |
128 h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='orange',alpha=0.4) | |
129 | |
130 l3 = plt.legend([h1,h2,h3],["RefSeq miRNA","Non-templated isomiR","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
131 plt.axvline(x=0, color="k") | |
132 plt.grid(axis='y', linewidth=0.2) | |
133 plt.grid(axis='x', linewidth=0.2) | |
134 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
135 plt.yticks(rotation=0,ha="right", fontsize=10) | |
136 plt.xticks(rotation=0,ha="right", fontsize=10) | |
137 plt.tight_layout() | |
138 figure = plt.gcf() # get current figure | |
139 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
140 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
141 | |
142 ######################################################################################################################################################################################################################################### | |
143 | |
144 def top_scatter_tem(matures,isoforms,uni_names,number): | |
145 | |
146 mat_names=[] | |
147 mat_log2fc=[] | |
148 | |
149 iso_names=[] | |
150 iso_log2fc=[] | |
151 | |
152 count=0 | |
153 for x in uni_names: | |
154 flag = False | |
155 if count<number: | |
156 for y in matures: | |
157 if x in y[0]: | |
158 mat_log2fc.append(y[1]) | |
159 mat_names.append(x) | |
160 flag=True | |
161 for y in isoforms: | |
162 if x in y[0]: | |
163 iso_log2fc.append(y[1]) | |
164 iso_names.append(x) | |
165 flag=True | |
166 if flag==True: | |
167 count+=1 | |
168 | |
169 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
170 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
171 | |
172 iso_df.sort_values(by=['names']) | |
173 mat_df.sort_values(by=['names']) | |
174 | |
175 fig, ax = plt.subplots() | |
176 | |
177 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4) | |
178 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4) | |
179 | |
180 l3 = plt.legend([h1,h3],["RefSeq miRNA","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
181 plt.axvline(x=0, color="k") | |
182 plt.grid(axis='y', linewidth=0.2) | |
183 plt.grid(axis='x', linewidth=0.2) | |
184 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
185 plt.yticks(rotation=0,ha="right", fontsize=10) | |
186 plt.xticks(rotation=0,ha="right", fontsize=10) | |
187 plt.tight_layout() | |
188 figure = plt.gcf() # get current figure | |
189 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
190 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
191 | |
192 | |
193 ############################################################################################################################################################################################################################################## | |
194 | |
195 def preproccess(non_templated,matures,isoforms,log2fc,pval,stat): | |
196 | |
197 if stat=="3": | |
198 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
199 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
200 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
201 else: | |
202 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
203 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
204 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
205 | |
206 mat_iso = mat+iso | |
207 | |
208 if not non_temp and not mat and not iso: | |
209 sys.exit("There aren't entries which meet these criteria") | |
210 | |
211 mat.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
212 iso.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
213 non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
214 | |
215 all=mat+iso+non_temp | |
216 all.sort(key = lambda x: abs(float(x[1])), reverse=True) | |
217 names=[x[0].split("_")[0] for x in all] | |
218 uni_names=unique(names) | |
219 | |
220 diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
221 diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
222 diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
223 | |
224 diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
225 diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
226 diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
227 | |
228 return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso | |
229 | |
230 ################################################################################################################################################################################################################################################> | |
231 |