Mercurial > repos > glogobyte > viztool
comparison viz_functions.py @ 26:6528239cedf0 draft
Uploaded
| author | glogobyte |
|---|---|
| date | Sun, 05 Dec 2021 13:29:46 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 25:a85854b520c9 | 26:6528239cedf0 |
|---|---|
| 1 import pandas as pd | |
| 2 import matplotlib.patches as mpatches | |
| 3 import matplotlib.font_manager as font_manager | |
| 4 import matplotlib.pyplot as plt | |
| 5 import sys | |
| 6 | |
| 7 ######################################################################################### | |
| 8 | |
| 9 # Read a file and return it as a list | |
| 10 def read(path, flag): | |
| 11 if flag == 0: | |
| 12 with open(path) as fp: | |
| 13 file=fp.readlines() | |
| 14 fp.close() | |
| 15 return file | |
| 16 | |
| 17 if flag == 1: | |
| 18 with open(path) as fp: | |
| 19 file = fp.read().splitlines() | |
| 20 fp.close() | |
| 21 return file | |
| 22 | |
| 23 # Write a list to a txt file | |
| 24 def write(path, list): | |
| 25 with open(path,'w') as fp: | |
| 26 for x in list: | |
| 27 fp.write(str("\t".join(x[1:-1]))) | |
| 28 fp.close() | |
| 29 | |
| 30 | |
| 31 ################################################################################################################################################################> | |
| 32 | |
| 33 def top_diff(miRNA_info, number,flag,l): | |
| 34 | |
| 35 Kind=[] | |
| 36 | |
| 37 miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True) | |
| 38 miRNA_info = miRNA_info[:number] | |
| 39 miRNA_info.sort(key = lambda x: x[0]) | |
| 40 | |
| 41 for x in miRNA_info: | |
| 42 if x[1] > 0: | |
| 43 Kind.append(True) | |
| 44 elif x[1] < 0: | |
| 45 Kind.append(False) | |
| 46 else: | |
| 47 Kind.append("Zero") | |
| 48 | |
| 49 top_miRNA = {"Names": [x[0] for x in miRNA_info], | |
| 50 "Log2FC": [x[1] for x in miRNA_info], | |
| 51 "Kind": Kind}; | |
| 52 | |
| 53 df_miRNA = pd.DataFrame(data=top_miRNA) | |
| 54 df_miRNA = df_miRNA.sort_values(by=['Names']) | |
| 55 if df_miRNA.empty==False: | |
| 56 h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'})) | |
| 57 figure = plt.gcf() # get current figure | |
| 58 figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18) | |
| 59 up_reg = mpatches.Patch(color='green', label='Upregulated') | |
| 60 down_reg = mpatches.Patch(color='red', label='Downregulated') | |
| 61 font = font_manager.FontProperties(weight='bold', style='normal') | |
| 62 l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
| 63 h1.set_ylabel(" ", fontsize=3, fontweight='bold') | |
| 64 h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold') | |
| 65 plt.axvline(x=0, color="k") | |
| 66 | |
| 67 plt.grid(axis='y', linewidth=0.2) | |
| 68 plt.grid(axis='x', linewidth=0.2) | |
| 69 if flag=='t': | |
| 70 plt.savefig('tem.png', bbox_inches='tight', dpi=300) | |
| 71 if flag=='nt': | |
| 72 plt.savefig('non.png', bbox_inches='tight', dpi=300) | |
| 73 | |
| 74 | |
| 75 ################################################################################################################################################################> | |
| 76 | |
| 77 def unique(sequence): | |
| 78 seen = set() | |
| 79 return [x for x in sequence if not (x in seen or seen.add(x))] | |
| 80 | |
| 81 ################################################################################################################################################################> | |
| 82 | |
| 83 def top_scatter_non(matures,isoforms,non_temp,uni_names,number): | |
| 84 | |
| 85 mat_names=[] | |
| 86 mat_log2fc=[] | |
| 87 | |
| 88 iso_names=[] | |
| 89 iso_log2fc=[] | |
| 90 | |
| 91 non_temp_names=[] | |
| 92 non_temp_log2fc=[] | |
| 93 | |
| 94 count=0 | |
| 95 for x in uni_names: | |
| 96 flag = False | |
| 97 if count<number: | |
| 98 for y in matures: | |
| 99 if x in y[0]: | |
| 100 mat_log2fc.append(y[1]) | |
| 101 mat_names.append(x) | |
| 102 flag=True | |
| 103 for y in isoforms: | |
| 104 if x in y[0]: | |
| 105 iso_log2fc.append(y[1]) | |
| 106 iso_names.append(x) | |
| 107 flag=True | |
| 108 for y in non_temp: | |
| 109 if x in y[0]: | |
| 110 non_temp_log2fc.append(y[1]) | |
| 111 non_temp_names.append(x) | |
| 112 flag=True | |
| 113 if flag==True: | |
| 114 count+=1 | |
| 115 | |
| 116 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
| 117 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
| 118 non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc)) | |
| 119 | |
| 120 iso_df.sort_values(by=['names']) | |
| 121 mat_df.sort_values(by=['names']) | |
| 122 non_df.sort_values(by=['names']) | |
| 123 | |
| 124 fig, ax = plt.subplots() | |
| 125 | |
| 126 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4) | |
| 127 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4) | |
| 128 h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='orange',alpha=0.4) | |
| 129 | |
| 130 l3 = plt.legend([h1,h2,h3],["RefSeq miRNA","Non-templated isomiR","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
| 131 plt.axvline(x=0, color="k") | |
| 132 plt.grid(axis='y', linewidth=0.2) | |
| 133 plt.grid(axis='x', linewidth=0.2) | |
| 134 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
| 135 plt.yticks(rotation=0,ha="right", fontsize=10) | |
| 136 plt.xticks(rotation=0,ha="right", fontsize=10) | |
| 137 plt.tight_layout() | |
| 138 figure = plt.gcf() # get current figure | |
| 139 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
| 140 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
| 141 | |
| 142 ######################################################################################################################################################################################################################################### | |
| 143 | |
| 144 def top_scatter_tem(matures,isoforms,uni_names,number): | |
| 145 | |
| 146 mat_names=[] | |
| 147 mat_log2fc=[] | |
| 148 | |
| 149 iso_names=[] | |
| 150 iso_log2fc=[] | |
| 151 | |
| 152 count=0 | |
| 153 for x in uni_names: | |
| 154 flag = False | |
| 155 if count<number: | |
| 156 for y in matures: | |
| 157 if x in y[0]: | |
| 158 mat_log2fc.append(y[1]) | |
| 159 mat_names.append(x) | |
| 160 flag=True | |
| 161 for y in isoforms: | |
| 162 if x in y[0]: | |
| 163 iso_log2fc.append(y[1]) | |
| 164 iso_names.append(x) | |
| 165 flag=True | |
| 166 if flag==True: | |
| 167 count+=1 | |
| 168 | |
| 169 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
| 170 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
| 171 | |
| 172 iso_df.sort_values(by=['names']) | |
| 173 mat_df.sort_values(by=['names']) | |
| 174 | |
| 175 fig, ax = plt.subplots() | |
| 176 | |
| 177 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4) | |
| 178 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4) | |
| 179 | |
| 180 l3 = plt.legend([h1,h3],["RefSeq miRNA","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
| 181 plt.axvline(x=0, color="k") | |
| 182 plt.grid(axis='y', linewidth=0.2) | |
| 183 plt.grid(axis='x', linewidth=0.2) | |
| 184 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
| 185 plt.yticks(rotation=0,ha="right", fontsize=10) | |
| 186 plt.xticks(rotation=0,ha="right", fontsize=10) | |
| 187 plt.tight_layout() | |
| 188 figure = plt.gcf() # get current figure | |
| 189 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
| 190 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
| 191 | |
| 192 | |
| 193 ############################################################################################################################################################################################################################################## | |
| 194 | |
| 195 def preproccess(non_templated,matures,isoforms,log2fc,pval,stat): | |
| 196 | |
| 197 if stat=="3": | |
| 198 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
| 199 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
| 200 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
| 201 else: | |
| 202 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
| 203 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
| 204 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
| 205 | |
| 206 mat_iso = mat+iso | |
| 207 | |
| 208 if not non_temp and not mat and not iso: | |
| 209 sys.exit("There aren't entries which meet these criteria") | |
| 210 | |
| 211 mat.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 212 iso.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 213 non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 214 | |
| 215 all=mat+iso+non_temp | |
| 216 all.sort(key = lambda x: abs(float(x[1])), reverse=True) | |
| 217 names=[x[0].split("_")[0] for x in all] | |
| 218 uni_names=unique(names) | |
| 219 | |
| 220 diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
| 221 diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
| 222 diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
| 223 | |
| 224 diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 225 diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 226 diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 227 | |
| 228 return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso | |
| 229 | |
| 230 ################################################################################################################################################################################################################################################> | |
| 231 |
