Mercurial > repos > glogobyte > viztool
changeset 15:6db3bd727fde draft
Uploaded
author | glogobyte |
---|---|
date | Wed, 28 Oct 2020 07:34:56 +0000 |
parents | e51ebc767701 |
children | 34ffe50a8da6 |
files | viz_ultra.py |
diffstat | 1 files changed, 270 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/viz_ultra.py Wed Oct 28 07:34:56 2020 +0000 @@ -0,0 +1,270 @@ +import argparse +from viz_graphs import * +import sys +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +import matplotlib.font_manager as font_manager +import time +from multiprocessing import Process, Queue, Lock, Pool, Manager, Value + + +################################################################################################################################################################################################################## + +def top_diff(miRNA_info, number,flag,l): + + Kind=[] + + miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True) + miRNA_info = miRNA_info[:number] + miRNA_info.sort(key = lambda x: x[0]) + + for x in miRNA_info: + if x[1] > 0: + Kind.append(True) + elif x[1] < 0: + Kind.append(False) + else: + Kind.append("Zero") + + top_miRNA = {"Names": [x[0] for x in miRNA_info], + "Log2FC": [x[1] for x in miRNA_info], + "Kind": Kind}; + + df_miRNA = pd.DataFrame(data=top_miRNA) + df_miRNA = df_miRNA.sort_values(by=['Names']) + if df_miRNA.empty==False: + h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'})) + figure = plt.gcf() # get current figure + figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18) + up_reg = mpatches.Patch(color='green', label='Upregulated') + down_reg = mpatches.Patch(color='red', label='Downregulated') + font = font_manager.FontProperties(weight='bold', style='normal') + l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + h1.set_ylabel(" ", fontsize=3, fontweight='bold') + h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold') + plt.axvline(x=0, color="k") + + plt.grid(axis='y', linewidth=0.2) + plt.grid(axis='x', linewidth=0.2) + if flag=='t': + plt.savefig('tem.png', bbox_inches='tight', dpi=300) + if flag=='nt': + plt.savefig('non.png', bbox_inches='tight', dpi=300) + +#################################################################################################################################################################################################################### + +def unique(sequence): + seen = set() + return [x for x in sequence if not (x in seen or seen.add(x))] + +########################################################################################################################################################################################################################################################################### + +def top_scatter_non(matures,isoforms,non_temp,uni_names,number): + + mat_names=[] + mat_log2fc=[] + + iso_names=[] + iso_log2fc=[] + + non_temp_names=[] + non_temp_log2fc=[] + + count=0 + for x in uni_names: + flag = False + if count<number: + for y in matures: + if x in y[0]: + mat_log2fc.append(y[1]) + mat_names.append(x) + flag=True + for y in isoforms: + if x in y[0]: + iso_log2fc.append(y[1]) + iso_names.append(x) + flag=True + for y in non_temp: + if x in y[0]: + non_temp_log2fc.append(y[1]) + non_temp_names.append(x) + flag=True + if flag==True: + count+=1 + + mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) + iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) + non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc)) + + iso_df.sort_values(by=['names']) + mat_df.sort_values(by=['names']) + non_df.sort_values(by=['names']) + + fig, ax = plt.subplots() + + h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red') + h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green') + h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='blue') + + l3 = plt.legend([h1,h2,h3],["Reference miRNA","Non-template","Template isomiRs"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.axvline(x=0, color="k") + plt.grid(axis='y', linewidth=0.2) + plt.grid(axis='x', linewidth=0.2) + plt.xlabel("Log2FC", fontsize=12, fontweight='bold') + plt.yticks(rotation=0,ha="right", fontsize=10) + plt.xticks(rotation=0,ha="right", fontsize=10) + plt.tight_layout() + figure = plt.gcf() # get current figure + figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) + plt.savefig('a2.png', bbox_inches='tight', dpi=300) + +######################################################################################################################################################################################################################################### +def top_scatter_tem(matures,isoforms,uni_names,number): + + mat_names=[] + mat_log2fc=[] + + iso_names=[] + iso_log2fc=[] + + count=0 + for x in uni_names: + flag = False + if count<number: + for y in matures: + if x in y[0]: + mat_log2fc.append(y[1]) + mat_names.append(x) + flag=True + for y in isoforms: + if x in y[0]: + iso_log2fc.append(y[1]) + iso_names.append(x) + flag=True + if flag==True: + count+=1 + + mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) + iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) + + iso_df.sort_values(by=['names']) + mat_df.sort_values(by=['names']) + + fig, ax = plt.subplots() + + h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red') + h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green') + + l3 = plt.legend([h1,h3],["Reference miRNA","Template isomiRs"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.axvline(x=0, color="k") + plt.grid(axis='y', linewidth=0.2) + plt.grid(axis='x', linewidth=0.2) + plt.xlabel("Log2FC", fontsize=12, fontweight='bold') + plt.yticks(rotation=0,ha="right", fontsize=10) + plt.xticks(rotation=0,ha="right", fontsize=10) + plt.tight_layout() + figure = plt.gcf() # get current figure + figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) + plt.savefig('a2.png', bbox_inches='tight', dpi=300) + + +############################################################################################################################################################################################################################################## +def preproccess(non_templated,matures,isoforms,log2fc,pval): + + non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval] + mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval] + iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval] + mat_iso = mat+iso + + if not non_temp and not mat and not iso: + sys.exit("There aren't entries which meet these criteria") + + mat.sort(key = lambda x: abs(float(x[1])),reverse=True) + iso.sort(key = lambda x: abs(float(x[1])),reverse=True) + non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True) + + all=mat+iso+non_temp + all.sort(key = lambda x: abs(float(x[1])), reverse=True) + names=[x[0].split("_")[0] for x in all] + uni_names=unique(names) + + diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] + diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] + diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] + + diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True) + diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True) + diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True) + + return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso + +################################################################################################################################################################################################################################################################## +starttime = time.time() + +parser = argparse.ArgumentParser() +parser.add_argument("-in", "--input", help="choose type of analysis", action="store") +parser.add_argument("-p_value", "--pval", help="choose type of analysis", action="store") +parser.add_argument("-fc", "--log2fc", help="choose type of analysis", action="store") +parser.add_argument("-top", "--top_mirnas", help="choose type of analysis", action="store") +parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") +parser.add_argument("-statistic", "--stat", help="tool directory path", action="store") +parser.add_argument("-diff_tool", "--tool", help="tool directory path", action="store") + +args = parser.parse_args() + +l=Lock() +number = int(args.top_mirnas) +log2fc = float(args.log2fc) +pval = float(args.pval) + +if args.tool=="2": + + raw_EdgeR = read(args.input,0) + EdgeR = [x.rstrip("\n").split("\t") for x in raw_EdgeR] + del EdgeR[0] + + if args.stat=="1": + non_templated = [[x[0],x[1],x[4]] for x in EdgeR if "__" in x[0] and x[1]!="NA" and x[4]!="NA"] + matures = [[x[0],x[1],x[4]] for x in EdgeR if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[4]!="NA"] + isoforms = [[x[0],x[1],x[4]] for x in EdgeR if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[4]!="NA"] + else: + non_templated = [[x[0],x[1],x[5]] for x in EdgeR if "__" in x[0] and x[1]!="NA" and x[5]!="NA"] + matures = [[x[0],x[1],x[5]] for x in EdgeR if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[5]!="NA"] + isoforms = [[x[0],x[1],x[5]] for x in EdgeR if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[5]!="NA"] + +if args.tool=="1": + + raw_Deseq = read(args.input,0) + Deseq = [x.rstrip("\n").split("\t") for x in raw_Deseq] + + if args.stat=="1": + non_templated = [[x[0],x[2],x[5]] for x in Deseq if "__" in x[0] and x[2]!="NA" and x[5]!="NA"] + matures = [[x[0],x[2],x[5]] for x in Deseq if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[5]!="NA"] + isoforms = [[x[0],x[2],x[5]] for x in Deseq if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[5]!="NA"] + else: + non_templated = [[x[0],x[2],x[6]] for x in Deseq if "__" in x[0] and x[2]!="NA" and x[6]!="NA"] + matures = [[x[0],x[2],x[6]] for x in Deseq if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[6]!="NA"] + isoforms = [[x[0],x[2],x[6]] for x in Deseq if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[6]!="NA"] + + +diff_matures,diff_isoforms,diff_non_templated,names,non_temp,mat_iso = preproccess(non_templated,matures,isoforms,log2fc,pval) + +if non_templated!=[]: + analysis="2" + p=[Process(target=top_diff,args=(non_temp,number,"nt",l))] + p.extend([Process(target=top_diff,args=(mat_iso,number,"t",l))]) + p.extend([Process(target=top_scatter_non,args=(diff_matures,diff_isoforms,diff_non_templated,names,number))]) + +else: + analysis="1" + p=[Process(target=top_diff,args=(mat_iso,number,"t"))] + p.extend([Process(target=top_scatter_tem,args=(diff_matures,diff_isoforms,names,number))]) + +[x.start() for x in p] +[x.join() for x in p] + +pdf_after_DE(analysis,args.top_mirnas) + +print('That took {} seconds'.format(time.time() - starttime)) +