Mercurial > repos > glogobyte > viztool
comparison viz_ultra.py @ 13:41de387b3982 draft
Uploaded
| author | glogobyte |
|---|---|
| date | Wed, 28 Oct 2020 07:32:14 +0000 |
| parents | 2c5723e2421a |
| children |
comparison
equal
deleted
inserted
replaced
| 12:3038dac9d61c | 13:41de387b3982 |
|---|---|
| 1 import argparse | |
| 2 from functions import * | |
| 3 from viz_graphs import * | |
| 4 import sys | |
| 5 import pandas as pd | |
| 6 import matplotlib.pyplot as plt | |
| 7 import matplotlib.patches as mpatches | |
| 8 import matplotlib.font_manager as font_manager | |
| 9 import time | |
| 10 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value | |
| 11 | |
| 12 | |
| 13 ################################################################################################################################################################################################################## | |
| 14 | |
| 15 def top_diff(miRNA_info, number,flag,l): | |
| 16 | |
| 17 Kind=[] | |
| 18 | |
| 19 miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True) | |
| 20 miRNA_info = miRNA_info[:number] | |
| 21 miRNA_info.sort(key = lambda x: x[0]) | |
| 22 | |
| 23 for x in miRNA_info: | |
| 24 if x[1] > 0: | |
| 25 Kind.append(True) | |
| 26 elif x[1] < 0: | |
| 27 Kind.append(False) | |
| 28 else: | |
| 29 Kind.append("Zero") | |
| 30 | |
| 31 top_miRNA = {"Names": [x[0] for x in miRNA_info], | |
| 32 "Log2FC": [x[1] for x in miRNA_info], | |
| 33 "Kind": Kind}; | |
| 34 | |
| 35 df_miRNA = pd.DataFrame(data=top_miRNA) | |
| 36 df_miRNA = df_miRNA.sort_values(by=['Names']) | |
| 37 if df_miRNA.empty==False: | |
| 38 h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'})) | |
| 39 figure = plt.gcf() # get current figure | |
| 40 figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18) | |
| 41 up_reg = mpatches.Patch(color='green', label='Upregulated') | |
| 42 down_reg = mpatches.Patch(color='red', label='Downregulated') | |
| 43 font = font_manager.FontProperties(weight='bold', style='normal') | |
| 44 l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
| 45 h1.set_ylabel(" ", fontsize=3, fontweight='bold') | |
| 46 h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold') | |
| 47 plt.axvline(x=0, color="k") | |
| 48 | |
| 49 plt.grid(axis='y', linewidth=0.2) | |
| 50 plt.grid(axis='x', linewidth=0.2) | |
| 51 if flag=='t': | |
| 52 plt.savefig('tem.png', bbox_inches='tight', dpi=300) | |
| 53 if flag=='nt': | |
| 54 plt.savefig('non.png', bbox_inches='tight', dpi=300) | |
| 55 | |
| 56 #################################################################################################################################################################################################################### | |
| 57 | |
| 58 def unique(sequence): | |
| 59 seen = set() | |
| 60 return [x for x in sequence if not (x in seen or seen.add(x))] | |
| 61 | |
| 62 ########################################################################################################################################################################################################################################################################### | |
| 63 | |
| 64 def top_scatter_non(matures,isoforms,non_temp,uni_names,number): | |
| 65 | |
| 66 mat_names=[] | |
| 67 mat_log2fc=[] | |
| 68 | |
| 69 iso_names=[] | |
| 70 iso_log2fc=[] | |
| 71 | |
| 72 non_temp_names=[] | |
| 73 non_temp_log2fc=[] | |
| 74 | |
| 75 count=0 | |
| 76 for x in uni_names: | |
| 77 flag = False | |
| 78 if count<number: | |
| 79 for y in matures: | |
| 80 if x in y[0]: | |
| 81 mat_log2fc.append(y[1]) | |
| 82 mat_names.append(x) | |
| 83 flag=True | |
| 84 for y in isoforms: | |
| 85 if x in y[0]: | |
| 86 iso_log2fc.append(y[1]) | |
| 87 iso_names.append(x) | |
| 88 flag=True | |
| 89 for y in non_temp: | |
| 90 if x in y[0]: | |
| 91 non_temp_log2fc.append(y[1]) | |
| 92 non_temp_names.append(x) | |
| 93 flag=True | |
| 94 if flag==True: | |
| 95 count+=1 | |
| 96 | |
| 97 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
| 98 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
| 99 non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc)) | |
| 100 | |
| 101 iso_df.sort_values(by=['names']) | |
| 102 mat_df.sort_values(by=['names']) | |
| 103 non_df.sort_values(by=['names']) | |
| 104 | |
| 105 fig, ax = plt.subplots() | |
| 106 | |
| 107 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red') | |
| 108 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green') | |
| 109 h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='blue') | |
| 110 | |
| 111 l3 = plt.legend([h1,h2,h3],["Reference miRNA","Non-template","Template isomiRs"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
| 112 plt.axvline(x=0, color="k") | |
| 113 plt.grid(axis='y', linewidth=0.2) | |
| 114 plt.grid(axis='x', linewidth=0.2) | |
| 115 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
| 116 plt.yticks(rotation=0,ha="right", fontsize=10) | |
| 117 plt.xticks(rotation=0,ha="right", fontsize=10) | |
| 118 plt.tight_layout() | |
| 119 figure = plt.gcf() # get current figure | |
| 120 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
| 121 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
| 122 | |
| 123 ######################################################################################################################################################################################################################################### | |
| 124 def top_scatter_tem(matures,isoforms,uni_names,number): | |
| 125 | |
| 126 mat_names=[] | |
| 127 mat_log2fc=[] | |
| 128 | |
| 129 iso_names=[] | |
| 130 iso_log2fc=[] | |
| 131 | |
| 132 count=0 | |
| 133 for x in uni_names: | |
| 134 flag = False | |
| 135 if count<number: | |
| 136 for y in matures: | |
| 137 if x in y[0]: | |
| 138 mat_log2fc.append(y[1]) | |
| 139 mat_names.append(x) | |
| 140 flag=True | |
| 141 for y in isoforms: | |
| 142 if x in y[0]: | |
| 143 iso_log2fc.append(y[1]) | |
| 144 iso_names.append(x) | |
| 145 flag=True | |
| 146 if flag==True: | |
| 147 count+=1 | |
| 148 | |
| 149 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
| 150 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
| 151 | |
| 152 iso_df.sort_values(by=['names']) | |
| 153 mat_df.sort_values(by=['names']) | |
| 154 | |
| 155 fig, ax = plt.subplots() | |
| 156 | |
| 157 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red') | |
| 158 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green') | |
| 159 | |
| 160 l3 = plt.legend([h1,h3],["Reference miRNA","Template isomiRs"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
| 161 plt.axvline(x=0, color="k") | |
| 162 plt.grid(axis='y', linewidth=0.2) | |
| 163 plt.grid(axis='x', linewidth=0.2) | |
| 164 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
| 165 plt.yticks(rotation=0,ha="right", fontsize=10) | |
| 166 plt.xticks(rotation=0,ha="right", fontsize=10) | |
| 167 plt.tight_layout() | |
| 168 figure = plt.gcf() # get current figure | |
| 169 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
| 170 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
| 171 | |
| 172 | |
| 173 ############################################################################################################################################################################################################################################## | |
| 174 def preproccess(non_templated,matures,isoforms,log2fc,pval): | |
| 175 | |
| 176 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
| 177 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
| 178 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
| 179 mat_iso = mat+iso | |
| 180 | |
| 181 if not non_temp and not mat and not iso: | |
| 182 sys.exit("There aren't entries which meet these criteria") | |
| 183 | |
| 184 mat.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 185 iso.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 186 non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 187 | |
| 188 all=mat+iso+non_temp | |
| 189 all.sort(key = lambda x: abs(float(x[1])), reverse=True) | |
| 190 names=[x[0].split("_")[0] for x in all] | |
| 191 uni_names=unique(names) | |
| 192 | |
| 193 diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
| 194 diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
| 195 diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
| 196 | |
| 197 diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 198 diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 199 diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 200 | |
| 201 return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso | |
| 202 | |
| 203 ################################################################################################################################################################################################################################################################## | |
| 204 starttime = time.time() | |
| 205 | |
| 206 parser = argparse.ArgumentParser() | |
| 207 parser.add_argument("-in", "--input", help="choose type of analysis", action="store") | |
| 208 parser.add_argument("-p_value", "--pval", help="choose type of analysis", action="store") | |
| 209 parser.add_argument("-fc", "--log2fc", help="choose type of analysis", action="store") | |
| 210 parser.add_argument("-top", "--top_mirnas", help="choose type of analysis", action="store") | |
| 211 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") | |
| 212 parser.add_argument("-statistic", "--stat", help="tool directory path", action="store") | |
| 213 parser.add_argument("-diff_tool", "--tool", help="tool directory path", action="store") | |
| 214 | |
| 215 args = parser.parse_args() | |
| 216 | |
| 217 l=Lock() | |
| 218 number = int(args.top_mirnas) | |
| 219 log2fc = float(args.log2fc) | |
| 220 pval = float(args.pval) | |
| 221 | |
| 222 if args.tool=="2": | |
| 223 | |
| 224 raw_EdgeR = read(args.input,0) | |
| 225 EdgeR = [x.rstrip("\n").split("\t") for x in raw_EdgeR] | |
| 226 del EdgeR[0] | |
| 227 | |
| 228 if args.stat=="1": | |
| 229 non_templated = [[x[0],x[1],x[4]] for x in EdgeR if "__" in x[0] and x[1]!="NA" and x[4]!="NA"] | |
| 230 matures = [[x[0],x[1],x[4]] for x in EdgeR if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[4]!="NA"] | |
| 231 isoforms = [[x[0],x[1],x[4]] for x in EdgeR if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[4]!="NA"] | |
| 232 else: | |
| 233 non_templated = [[x[0],x[1],x[5]] for x in EdgeR if "__" in x[0] and x[1]!="NA" and x[5]!="NA"] | |
| 234 matures = [[x[0],x[1],x[5]] for x in EdgeR if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[5]!="NA"] | |
| 235 isoforms = [[x[0],x[1],x[5]] for x in EdgeR if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[5]!="NA"] | |
| 236 | |
| 237 if args.tool=="1": | |
| 238 | |
| 239 raw_Deseq = read(args.input,0) | |
| 240 Deseq = [x.rstrip("\n").split("\t") for x in raw_Deseq] | |
| 241 | |
| 242 if args.stat=="1": | |
| 243 non_templated = [[x[0],x[2],x[5]] for x in Deseq if "__" in x[0] and x[2]!="NA" and x[5]!="NA"] | |
| 244 matures = [[x[0],x[2],x[5]] for x in Deseq if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[5]!="NA"] | |
| 245 isoforms = [[x[0],x[2],x[5]] for x in Deseq if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[5]!="NA"] | |
| 246 else: | |
| 247 non_templated = [[x[0],x[2],x[6]] for x in Deseq if "__" in x[0] and x[2]!="NA" and x[6]!="NA"] | |
| 248 matures = [[x[0],x[2],x[6]] for x in Deseq if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[6]!="NA"] | |
| 249 isoforms = [[x[0],x[2],x[6]] for x in Deseq if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[6]!="NA"] | |
| 250 | |
| 251 | |
| 252 diff_matures,diff_isoforms,diff_non_templated,names,non_temp,mat_iso = preproccess(non_templated,matures,isoforms,log2fc,pval) | |
| 253 | |
| 254 if non_templated!=[]: | |
| 255 analysis="2" | |
| 256 p=[Process(target=top_diff,args=(non_temp,number,"nt",l))] | |
| 257 p.extend([Process(target=top_diff,args=(mat_iso,number,"t",l))]) | |
| 258 p.extend([Process(target=top_scatter_non,args=(diff_matures,diff_isoforms,diff_non_templated,names,number))]) | |
| 259 | |
| 260 else: | |
| 261 analysis="1" | |
| 262 p=[Process(target=top_diff,args=(mat_iso,number,"t"))] | |
| 263 p.extend([Process(target=top_scatter_tem,args=(diff_matures,diff_isoforms,names,number))]) | |
| 264 | |
| 265 [x.start() for x in p] | |
| 266 [x.join() for x in p] | |
| 267 | |
| 268 pdf_after_DE(analysis,args.top_mirnas) | |
| 269 | |
| 270 print('That took {} seconds'.format(time.time() - starttime)) | |
| 271 |
