Mercurial > repos > jjohnson > cistrome_correlation
diff venn_diagram.xml @ 0:3d1097835b2f draft default tip
Imported from capsule None
author | jjohnson |
---|---|
date | Mon, 22 Sep 2014 11:54:41 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/venn_diagram.xml Mon Sep 22 11:54:41 2014 -0400 @@ -0,0 +1,207 @@ +<tool name="Venn Diagram" id="ceas_venn" version="0.1.0"> + <description>Given 2 or 3 intervals, generate a venn diagram of their intersections</description> + <macros> + <import>corr_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +#if not bool(str($bf1label)) +#set $bf1label=$bfile1.name +#end if +#if not bool(str($bf2label)) +#set $bf2label=$bfile2.name +#end if +#if not bool(str($bf3label)) +#set $bf3label=$bfile3.name +#end if + venn_diagram.py $bfile1 $bfile2 $bfile3 -H $height -W $width -t "$title" -l "$bf1label" -l "$bf2label" -l "$bf3label" &> $log; + </command> + <inputs> + <param ftype="interval" format="bed" name="bfile1" type="data" label="BED file 1(total number of lines in the 3 files should not exceed 100,000)"/> + <param name="bf1label" type="text" label="Bed file 1 label:" optional="true"/> + <param ftype="interval" format="bed" name="bfile2" type="data" label="BED file 2(total number of lines in the 3 files should not exceed 100,000)"/> + <param name="bf2label" type="text" label="Bed file 2 label:" optional="true"/> + <param ftype="interval" format="bed" name="bfile3" type="data" label="BED file 3(total number of lines in the 3 files should not exceed 100,000)" optional="true"/> + <param name="bf3label" type="text" label="Bed file 3 label:" optional="true"/> + <param name="height" type="integer" label="Pic Height" value="500"> + <validator type="in_range" max="5000" min="500" message="Pic Height is out of range, Pic Height has to be between 500 to 5000" /> + </param> + <param name="width" type="integer" label="Pic Width" value="500"> + <validator type="in_range" max="5000" min="500" message="Pic Width is out of range, Pic Width has to be between 500 to 5000" /> + </param> + <param name="title" type="text" label="Diagram title" value="Bed Venn Diagram"/> + <param name="porportional" type="boolean" label="Draw Porportional Diagrams" checked="yes" + truevalue="" falsevalue="-n" /> + </inputs> + <outputs> + <data format="png" name="output" from_work_dir="venn_diagram.png"/> + <data format="txt" name="log" label="log file" /> + </outputs> + <expand macro="stdio"/> + <configfiles> + <configfile name="shscript"> +#!/bin/bash +#import os + +#include source=$script_chars# + +#set $path = os.path.abspath($__app__.config.tool_path) + +##Note: the third bed file is optional, if it is not specified, we duplicate +##the second file as the third argument +THIRD=$bfile3 +if [ $bfile3 = "None" ]; then ##duplicate the second arg + THIRD=$bfile2 + lines=`wc -l $bfile1 $bfile2 | tail -1 | awk '{print ${dollar}1}'` +else + lines=`wc -l $bfile1 $bfile2 ${dollar}THIRD | tail -1 | awk '{print ${dollar}1}'` +fi + +##check the format of the 3 bed files +format1='passed' ##`$path/validation/fcfunc.py $bfile1` +format2='passed' ##`$path/validation/fcfunc.py $bfile2` +format3='passed' ##`$path/validation/fcfunc.py ${dollar}THIRD` + +#if not bool(str($bf1label)) +#set $bf1label=$bfile1.name +#end if +#if not bool(str($bf2label)) +#set $bf2label=$bfile2.name +#end if +#if not bool(str($bf3label)) +#set $bf3label=$bfile3.name +#end if + +#set $tlablelen1=len(str($bf1label)) +#set $tlablelen2=len(str($bf2label)) +#set $tlablelen3=len(str($bf3label)) +#set $tlablelen4=len(str($title)) + +if [[ ${dollar}lines -gt 100000 ]];then + echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2; + exit; +elif [[ ${dollar}format1 != "passed" ]];then + echo "BED file 1: ${dollar}format1" ${gt}${ad}2; + exit; +elif [[ ${dollar}format2 != "passed" ]];then + echo "BED file 2: ${dollar}format2" ${gt}${ad}2; + exit; +elif [[ ${dollar}format3 != "passed" ]];then + echo "BED file 3: ${dollar}format3" ${gt}${ad}2; + exit; +elif [[ $tlablelen1 -gt 255 ]];then + echo "Bed file 1 label is too long! 255 characters is at most!" ${gt}${ad}2 + exit; +elif [[ $tlablelen2 -gt 255 ]];then + echo "Bed file 2 label is too long! 255 characters is at most!" ${gt}${ad}2 + exit; +elif [[ $tlablelen3 -gt 255 ]];then + echo "Bed file 3 label is too long! 255 characters is at most!" ${gt}${ad}2 + exit; +elif [[ $tlablelen4 -gt 255 ]];then + echo "Diagram title is too long! 255 characters is at most!" ${gt}${ad}2 + exit; +else + venn_diagram.py $bfile1 $bfile2 $bfile3 -H $height -W $width -t "$title" -l "$bf1label" -l "$bf2label" -l "$bf3label" ${gt}${ad} $log; + cp venn_diagram.png $output; +fi + </configfile> + </configfiles> +<tests> + <test maxseconds="3600" name="VennDiagram_1"> + <param name="bfile1" value="bedfile1.bed" /> + <param name="bf1label" value="BedFile1" /> + <param name="bfile2" value="bedfile2.bed" /> + <param name="bf2label" value="BedFile2" /> + <param name="bfile3" value="bedfile3.bed" /> + <param name="bf3label" value="BedFile3" /> + <param name="height" value="500" /> + <param name="width" value="500" /> + <param name="title" value="BedVennDiagram" /> + <param name="porportional" value="" /> + <output name="output" file="venndiagram_1/venndiagram_1.png" lines_diff = "40" /> + <output name="output" file="venndiagram_1/venndiagram_1.log" lines_diff = "200" /> + </test> + <test maxseconds="3600" name="VennDiagram_2"> + <param name="bfile1" value="bedfile1.bed" /> + <param name="bf1label" value="BedFile1" /> + <param name="bfile2" value="bedfile2.bed" /> + <param name="bf2label" value="BedFile2" /> + <param name="bfile3" value="bedfile3.bed" /> + <param name="bf3label" value="BedFile3" /> + <param name="height" value="5000" /> + <param name="width" value="5000" /> + <param name="title" value="BedVennDiagram" /> + <param name="porportional" value="" /> + <output name="output" file="venndiagram_2/venndiagram_2.png" lines_diff = "40" /> + <output name="output" file="venndiagram_2/venndiagram_2.log" lines_diff = "200" /> + </test> +</tests> + <help> +This tool generates a venn diagram of the intersection of multiple intervals +files. The original code is written by Jacqueline Wentz and revised by +Tao Liu. It will calculate how many regions are overlapped between BED +files and use the Google Chart API to draw the final figure. + +.. class:: warningmark + +**CAUTION:** When three data sets are used, and their sizes differ a +lot. The figure may not be correct. But the numbers are correct. + +**CAUTION:** The maximum number of lines in all the input files should +not exceed 100,000. + +.. class:: warningmark + +**NEED IMPROVEMENT** + +----- + +**Parameters** + +- **BED file 1 and 2** are the two BED files to be used to calculate + the overlap. +- **BED file 3** is the third BED file to be used. It's optional. +- **BED file labels** name of the datasets displayed on the diagram + leave blank to use the default name +- **Pic Height** is the height of the final image. +- **Pic Width** is the width of the final image. +- **Diagram title** is the title of the final image. + +----- + +**Outputs** + +- **PNG file** is the Venn diagram plot. The numbers of overlapped + regions are included in the figure. +- **LOG file** is the job log. If you see errors, please attach this in + the bug report. + +----- + +**summary** + +For the regions in bed file, it will do a cluster first. +For example, we have 5 regions + +bed A: +(200, 900) +(1000, 1200) + +bed B: +(100, 300) +(700, 1100) +(1400, 1500) + +Since they have overlaps, It will cluster them into 2 regions: +(100, 1200) (1400, 1500), and see whether each region +(of the 2 regions) is included in bed A or B. So, + +(100, 1200) is included in bed A, B + +(1400,1500) is included in bed B + +Then we draw the venn diagram + </help> + +</tool>