Mercurial > repos > glogobyte > viztool
changeset 18:d77dace80d5a draft
Deleted selected files
author | glogobyte |
---|---|
date | Wed, 28 Oct 2020 07:49:34 +0000 |
parents | a09d238416ba |
children | b250f09f4eb4 |
files | viz.xml viz_graphs.py viz_ultra.py |
diffstat | 3 files changed, 0 insertions(+), 534 deletions(-) [+] |
line wrap: on
line diff
--- a/viz.xml Wed Oct 28 07:42:50 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -<tool id="viz_tool" name="Viz_tool: After Deseq2" version="0.1.0"> - <description>for each sequence in a file</description> - <requirements> - <requirement type="package" version="1.7">fpdf</requirement> - <requirement type="package" version="3.7.4">python</requirement> - <requirement type="package" version="1.17.3">numpy</requirement> - <requirement type="package" version="3.1.2">matplotlib</requirement> - <requirement type="package" version="1.0.3">pandas</requirement> - </requirements> - <command> - #if $stats.choice == "1": - python $__tool_directory__/viz_ultra.py -in $input_file -p_value "$stats.pvalue" -fc $log2fc -top $top_mirna -tool_dir $__tool_directory__ -statistic "$stats.choice" -diff_tool "$tool" - #else: - python $__tool_directory__/viz_ultra.py -in $input_file -p_value "$stats.padj" -fc $log2fc -top $top_mirna -tool_dir $__tool_directory__ -statistic "$stats.choice" -diff_tool "$tool" - #end if - </command> - <inputs> - <param name="tool" type="select" label="File comes from" help="Choose the tool which generates the input file."> - <option value="1" selected="true">Deseq2</option> - <option value="2">EdgeR</option> - </param> - <param name="input_file" type="data" format="tabular" label="Input file" help="File from Deseq2 or EdgeR"/> - <param name="top_mirna" type="select" label="Choose the top differentially miRNAs of the analysis" help="Choose the number of top differentially expressed miRNAs."> - <option value="10" selected="true">Top 10</option> - <option value="20">Top 20</option> - <option value="30">Top 30</option> - <option value="40">Top 40</option> - <option value="50">Top 50</option> - </param> - <conditional name="stats"> - <param name="choice" type="select" label="Choose p-value or p-adj" help="asdadadasd"> - <option value="1" selected="true">Pvalue</option> - <option value="2">Padj</option> - </param> - <when value="1"> - <param name="pvalue" type="float" min="0" max="1" value="0.05" label="P-value (max value)" help="p-value threshold" /> - </when> - <when value="2"> - <param name="padj" type="float" min="0" max="1" value="0.05" label="P-adjustment (max value)" help="p-adjustment threshold" /> - </when> - </conditional> - <param name="log2fc" type="float" min="0" max="10" value="1" label="Log2FC (Absolute value)" help="Log2FC threshold" /> - </inputs> - <outputs> - <!--data name="TOP_temp" format="png" label="TOP_$top_mirna Differentially Expressed miRNAs" from_work_dir="$__tool_directory__/tem.png" /--> - <!--data name="TOP_non" format="png" label="TOP_$top_mirna Differentially Expressed miRNAs" from_work_dir="$__tool_directory__/non.png" /--> - <data name="Scatter" format="png" label="Scatter Differentially Expressed miRNAs" from_work_dir="$__tool_directory__/a3.png" /> - <data name="File" format="pdf" label="PDF" from_work_dir="$__tool_directory__/report2.pdf" /> - <data name="File1" format="txt" label="txt" from_work_dir="$__tool_directory__/demo.txt" /> - </outputs> - <help> - - </help> -</tool>
--- a/viz_graphs.py Wed Oct 28 07:42:50 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -import sys - -# Read a file and return it as a list -def read(path, flag): - if flag == 0: - with open(path) as fp: - file=fp.readlines() - fp.close() - return file - - if flag == 1: - with open(path) as fp: - file = fp.read().splitlines() - fp.close() - return file - - -################################################################################################# -def pdf_before_DE(analysis): - - # Import FPDF class - from fpdf import FPDF, fpdf - - # Import glob module to find all the files matching a pattern - import glob - - # Image extensions - if analysis=="2": - image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png") - else: - image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png") - # This list will hold the images file names - images = [] - - # Build the image list by merging the glob results (a list of files) - # for each extension. We are taking images from current folder. - for extension in image_extensions: - images.extend(glob.glob(extension)) - #sys.exit(images) - # Create instance of FPDF class - pdf = FPDF('P', 'in', 'A4') - # Add new page. Without this you cannot create the document. - pdf.add_page() - # Set font to Arial, 'B'old, 16 pts - pdf.set_font('Arial', 'B', 20.0) - - # Page header - pdf.cell(pdf.w-0.5, 0.5, 'IsomiR Profile Report',align='C') - pdf.ln(0.7) - pdf.set_font('Arial','', 16.0) - pdf.cell(pdf.w-0.5, 0.5, 'sRNA Length Distribution',align='C') - - # Smaller font for image captions - pdf.set_font('Arial', '', 11.0) - - # Image caption - pdf.ln(0.5) - - yh=FPDF.get_y(pdf) - pdf.image(images[0],x=0.3,w=4, h=3) - pdf.image(images[1],x=4,y=yh, w=4, h=3) - pdf.ln(0.3) - - # Image caption - pdf.cell(0.2) - pdf.cell(3.0, 0.0, " Mapped and unmapped reads to custom precussor arm reference DB (5p and 3p arms) in Control (left)") - pdf.ln(0.2) - pdf.cell(0.2) - pdf.cell(3.0, 0.0, " and Treated (right) groups") - - - pdf.ln(0.5) - h1=FPDF.get_y(pdf) - pdf.image(images[2],x=1, w=6.5, h=5) - h2=FPDF.get_y(pdf) - FPDF.set_y(pdf,h1+0.2) - pdf.set_font('Arial','', 14.0) - pdf.cell(pdf.w-0.5, 0.5, 'Template and non-template IsomiRs',align='C') - pdf.set_font('Arial', '', 11.0) - FPDF.set_y(pdf,h2) - FPDF.set_y(pdf,9.5) - # Image caption - pdf.cell(0.2) - if analysis=="2": - pdf.cell(3.0, 0.0, " Template, non-template, miRNA reference and unmapped sequences as percentage of total sRNA") - else: - pdf.cell(3.0, 0.0, " Template, miRNA reference and unmapped sequences as percentage of total sRNA") - pdf.ln(0.2) - pdf.cell(0.2) - pdf.cell(3.0, 0.0, " reads in Control (left) and treated (right) groups") - - - - pdf.add_page() - pdf.set_font('Arial', 'B', 16.0) - pdf.cell(pdf.w-0.5, 0.5, "Reference form and isomiR among total miRNA reads",align='C') - pdf.ln(0.7) - pdf.set_font('Arial', 'B', 12.0) - pdf.cell(pdf.w-0.5, 0.5, "Template isomiR profile (redundant)",align='C') - pdf.ln(0.5) - pdf.image(images[3],x=1.5, w=5.5, h=4) - pdf.ln(0.6) - pdf.cell(pdf.w-0.5, 0.0, "Template isomiR profile (non-redundant)",align='C') - pdf.set_font('Arial', '', 12.0) - pdf.ln(0.2) - pdf.image(images[4],x=1.5, w=5.5, h=4) - pdf.ln(0.3) - pdf.set_font('Arial', '', 11.0) - pdf.cell(0.2) - pdf.cell(3.0, 0.0, " * IsomiRs potentialy initiated from multiple loci") - - - if analysis=="2": - pdf.add_page('L') - - pdf.set_font('Arial', 'B', 16.0) - pdf.cell(pdf.w-0.5, 0.5, "Non-template IsomiRs",align='C') - pdf.ln(0.5) - pdf.set_font('Arial', 'B', 12.0) - pdf.cell(pdf.w-0.5, 0.5, "3' Additions of reference of isomiR sequence",align='C') - pdf.ln(0.7) - - yh=FPDF.get_y(pdf) - pdf.image(images[5],x=1.5,w=3.65, h=2.65) - pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65) - pdf.ln(0.5) - yh=FPDF.get_y(pdf) - pdf.image(images[6],x=1.5,w=3.65, h=2.65) - pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65) - - pdf.close() - pdf.output('report1.pdf','F') - - - - -#############################################################################################################################################################3 - -def pdf_after_DE(analysis,top): - - # Import FPDF class - from fpdf import FPDF - - # Import glob module to find all the files matching a pattern - import glob - - # Image extensions - if analysis=="2": - image_extensions = ("tem.png","a2.png","non.png") - else: - image_extensions = ("tem.png","a2.png") - - # This list will hold the images file names - images = [] - - # Build the image list by merging the glob results (a list of files) - # for each extension. We are taking images from current folder. - for extension in image_extensions: - images.extend(glob.glob(extension)) - #sys.exit(images) - # Create instance of FPDF class - pdf = FPDF('P', 'in', 'letter') - # Add new page. Without this you cannot create the document. - pdf.add_page() - # Set font to Arial, 'B'old, 16 pts - pdf.set_font('Arial', 'B', 16.0) - - # Page header - pdf.cell(pdf.w-0.5, 0.5, 'Differential expression of miRNAs and Isoforms',align='C') - #pdf.ln(0.25) - - pdf.ln(0.7) - pdf.set_font('Arial','B', 12.0) - if "tem.png" in images: - pdf.cell(pdf.w-0.5, 0.5, 'Top '+top+' most differentially expressed miRNA and template isoforms',align='C') - # Smaller font for image captions - pdf.set_font('Arial', '', 10.0) - # Image caption - pdf.ln(0.4) - pdf.image(images[images.index("tem.png")],x=0.8, w=7, h=8) - pdf.ln(0.3) - pdf.set_font('Arial','B', 12.0) - else: - print("WARNING: There aren't miRNAs which fullfiled these criteria" ) - - if "non.png" in images and analysis=="2": - if "tem.png" in images: pdf.add_page() - pdf.ln(0.7) - pdf.cell(pdf.w-0.5, 0.5, 'Top '+top+' most differentially expressed non-template isomiRs',align='C') - pdf.ln(0.4) - pdf.image(images[images.index("non.png")],x=0.5, w=7.5, h=6.5) - else: - print("WARNING: There aren't non-template miRNAs which fullfiled these criteria" ) - - - if "a2.png" in images: - if len(images)>=2: pdf.add_page() - pdf.ln(0.5) - pdf.cell(pdf.w-0.5, 0.5, 'Top '+top+' most differentially expressed miRNAs and isomiRs grouped by arm',align='C') - pdf.ln(0.4) - pdf.image(images[images.index("a2.png")],x=0.8, w=7, h=8) - pdf.ln(0.3) - else: - print("WARNING: There aren't non-template miRNAs which fullfiled these criteria" ) - - - pdf.output('report2.pdf', 'F') - - -
--- a/viz_ultra.py Wed Oct 28 07:42:50 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,270 +0,0 @@ -import argparse -from viz_graphs import * -import sys -import pandas as pd -import matplotlib.pyplot as plt -import matplotlib.patches as mpatches -import matplotlib.font_manager as font_manager -import time -from multiprocessing import Process, Queue, Lock, Pool, Manager, Value - - -################################################################################################################################################################################################################## - -def top_diff(miRNA_info, number,flag,l): - - Kind=[] - - miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True) - miRNA_info = miRNA_info[:number] - miRNA_info.sort(key = lambda x: x[0]) - - for x in miRNA_info: - if x[1] > 0: - Kind.append(True) - elif x[1] < 0: - Kind.append(False) - else: - Kind.append("Zero") - - top_miRNA = {"Names": [x[0] for x in miRNA_info], - "Log2FC": [x[1] for x in miRNA_info], - "Kind": Kind}; - - df_miRNA = pd.DataFrame(data=top_miRNA) - df_miRNA = df_miRNA.sort_values(by=['Names']) - if df_miRNA.empty==False: - h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'})) - figure = plt.gcf() # get current figure - figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18) - up_reg = mpatches.Patch(color='green', label='Upregulated') - down_reg = mpatches.Patch(color='red', label='Downregulated') - font = font_manager.FontProperties(weight='bold', style='normal') - l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) - h1.set_ylabel(" ", fontsize=3, fontweight='bold') - h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold') - plt.axvline(x=0, color="k") - - plt.grid(axis='y', linewidth=0.2) - plt.grid(axis='x', linewidth=0.2) - if flag=='t': - plt.savefig('tem.png', bbox_inches='tight', dpi=300) - if flag=='nt': - plt.savefig('non.png', bbox_inches='tight', dpi=300) - -#################################################################################################################################################################################################################### - -def unique(sequence): - seen = set() - return [x for x in sequence if not (x in seen or seen.add(x))] - -########################################################################################################################################################################################################################################################################### - -def top_scatter_non(matures,isoforms,non_temp,uni_names,number): - - mat_names=[] - mat_log2fc=[] - - iso_names=[] - iso_log2fc=[] - - non_temp_names=[] - non_temp_log2fc=[] - - count=0 - for x in uni_names: - flag = False - if count<number: - for y in matures: - if x in y[0]: - mat_log2fc.append(y[1]) - mat_names.append(x) - flag=True - for y in isoforms: - if x in y[0]: - iso_log2fc.append(y[1]) - iso_names.append(x) - flag=True - for y in non_temp: - if x in y[0]: - non_temp_log2fc.append(y[1]) - non_temp_names.append(x) - flag=True - if flag==True: - count+=1 - - mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) - iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) - non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc)) - - iso_df.sort_values(by=['names']) - mat_df.sort_values(by=['names']) - non_df.sort_values(by=['names']) - - fig, ax = plt.subplots() - - h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red') - h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green') - h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='blue') - - l3 = plt.legend([h1,h2,h3],["Reference miRNA","Non-template","Template isomiRs"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) - plt.axvline(x=0, color="k") - plt.grid(axis='y', linewidth=0.2) - plt.grid(axis='x', linewidth=0.2) - plt.xlabel("Log2FC", fontsize=12, fontweight='bold') - plt.yticks(rotation=0,ha="right", fontsize=10) - plt.xticks(rotation=0,ha="right", fontsize=10) - plt.tight_layout() - figure = plt.gcf() # get current figure - figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) - plt.savefig('a2.png', bbox_inches='tight', dpi=300) - -######################################################################################################################################################################################################################################### -def top_scatter_tem(matures,isoforms,uni_names,number): - - mat_names=[] - mat_log2fc=[] - - iso_names=[] - iso_log2fc=[] - - count=0 - for x in uni_names: - flag = False - if count<number: - for y in matures: - if x in y[0]: - mat_log2fc.append(y[1]) - mat_names.append(x) - flag=True - for y in isoforms: - if x in y[0]: - iso_log2fc.append(y[1]) - iso_names.append(x) - flag=True - if flag==True: - count+=1 - - mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) - iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) - - iso_df.sort_values(by=['names']) - mat_df.sort_values(by=['names']) - - fig, ax = plt.subplots() - - h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red') - h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green') - - l3 = plt.legend([h1,h3],["Reference miRNA","Template isomiRs"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) - plt.axvline(x=0, color="k") - plt.grid(axis='y', linewidth=0.2) - plt.grid(axis='x', linewidth=0.2) - plt.xlabel("Log2FC", fontsize=12, fontweight='bold') - plt.yticks(rotation=0,ha="right", fontsize=10) - plt.xticks(rotation=0,ha="right", fontsize=10) - plt.tight_layout() - figure = plt.gcf() # get current figure - figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) - plt.savefig('a2.png', bbox_inches='tight', dpi=300) - - -############################################################################################################################################################################################################################################## -def preproccess(non_templated,matures,isoforms,log2fc,pval): - - non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval] - mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval] - iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval] - mat_iso = mat+iso - - if not non_temp and not mat and not iso: - sys.exit("There aren't entries which meet these criteria") - - mat.sort(key = lambda x: abs(float(x[1])),reverse=True) - iso.sort(key = lambda x: abs(float(x[1])),reverse=True) - non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True) - - all=mat+iso+non_temp - all.sort(key = lambda x: abs(float(x[1])), reverse=True) - names=[x[0].split("_")[0] for x in all] - uni_names=unique(names) - - diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] - diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] - diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] - - diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True) - diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True) - diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True) - - return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso - -################################################################################################################################################################################################################################################################## -starttime = time.time() - -parser = argparse.ArgumentParser() -parser.add_argument("-in", "--input", help="choose type of analysis", action="store") -parser.add_argument("-p_value", "--pval", help="choose type of analysis", action="store") -parser.add_argument("-fc", "--log2fc", help="choose type of analysis", action="store") -parser.add_argument("-top", "--top_mirnas", help="choose type of analysis", action="store") -parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") -parser.add_argument("-statistic", "--stat", help="tool directory path", action="store") -parser.add_argument("-diff_tool", "--tool", help="tool directory path", action="store") - -args = parser.parse_args() - -l=Lock() -number = int(args.top_mirnas) -log2fc = float(args.log2fc) -pval = float(args.pval) - -if args.tool=="2": - - raw_EdgeR = read(args.input,0) - EdgeR = [x.rstrip("\n").split("\t") for x in raw_EdgeR] - del EdgeR[0] - - if args.stat=="1": - non_templated = [[x[0],x[1],x[4]] for x in EdgeR if "__" in x[0] and x[1]!="NA" and x[4]!="NA"] - matures = [[x[0],x[1],x[4]] for x in EdgeR if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[4]!="NA"] - isoforms = [[x[0],x[1],x[4]] for x in EdgeR if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[4]!="NA"] - else: - non_templated = [[x[0],x[1],x[5]] for x in EdgeR if "__" in x[0] and x[1]!="NA" and x[5]!="NA"] - matures = [[x[0],x[1],x[5]] for x in EdgeR if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[5]!="NA"] - isoforms = [[x[0],x[1],x[5]] for x in EdgeR if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[1]!="NA" and x[5]!="NA"] - -if args.tool=="1": - - raw_Deseq = read(args.input,0) - Deseq = [x.rstrip("\n").split("\t") for x in raw_Deseq] - - if args.stat=="1": - non_templated = [[x[0],x[2],x[5]] for x in Deseq if "__" in x[0] and x[2]!="NA" and x[5]!="NA"] - matures = [[x[0],x[2],x[5]] for x in Deseq if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[5]!="NA"] - isoforms = [[x[0],x[2],x[5]] for x in Deseq if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[5]!="NA"] - else: - non_templated = [[x[0],x[2],x[6]] for x in Deseq if "__" in x[0] and x[2]!="NA" and x[6]!="NA"] - matures = [[x[0],x[2],x[6]] for x in Deseq if 'chr' in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[6]!="NA"] - isoforms = [[x[0],x[2],x[6]] for x in Deseq if 'chr' not in x[0].split("_")[-1] and "__" not in x[0] and x[2]!="NA" and x[6]!="NA"] - - -diff_matures,diff_isoforms,diff_non_templated,names,non_temp,mat_iso = preproccess(non_templated,matures,isoforms,log2fc,pval) - -if non_templated!=[]: - analysis="2" - p=[Process(target=top_diff,args=(non_temp,number,"nt",l))] - p.extend([Process(target=top_diff,args=(mat_iso,number,"t",l))]) - p.extend([Process(target=top_scatter_non,args=(diff_matures,diff_isoforms,diff_non_templated,names,number))]) - -else: - analysis="1" - p=[Process(target=top_diff,args=(mat_iso,number,"t"))] - p.extend([Process(target=top_scatter_tem,args=(diff_matures,diff_isoforms,names,number))]) - -[x.start() for x in p] -[x.join() for x in p] - -pdf_after_DE(analysis,args.top_mirnas) - -print('That took {} seconds'.format(time.time() - starttime)) -