changeset 0:db556c94a101 draft default tip

Uploaded
author elixir-it
date Tue, 27 Oct 2020 14:48:56 +0000
parents
children
files bubble/MatData.csv bubble/VirData.csv bubble/bubble.xml bubble/bubbleplot.R bubble/plotMdata.pl
diffstat 5 files changed, 121 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bubble/MatData.csv	Tue Oct 27 14:48:56 2020 +0000
@@ -0,0 +1,6 @@
+"241_C|T"	"514_T|C"	"1059_C|T"	"1397_G|A"	"1440_G|A"	"1605_ATG|..."	"2416_C|T"	"2480_A|G"	"2558_C|T"	"2891_G|A"	"3037_C|T"	"8782_C|T"	"9477_T|A"	"10097_G|A"	"11083_G|T"	"11916_C|T"	"14408_C|T"	"14805_C|T"	"15324_C|T"	"17247_T|C"	"17747_C|T"	"17858_A|G"	"18060_C|T"	"18877_C|T"	"18998_C|T"	"20268_A|G"	"23403_A|G"	"23731_C|T"	"24034_C|T"	"25429_G|T"	"25563_G|T"	"25979_G|T"	"26144_G|T"	"27046_C|T"	"27964_C|T"	"28144_T|C"	"28311_C|T"	"28657_C|T"	"28688_T|C"	"28851_G|T"	"28854_C|T"	"28863_C|T"	"28881_GGG|AAC"	"29540_G|A"	"29553_G|A"	"29742_G|T"	
+"C_1"	0.0006075334	0	0	0	0	0	0	0	0	0	0.0018226002	0.9933171324	0.0990279465	0	0.0127582017	0	0	0.0990279465	0	0	0.6616038882	0.6719319563	0.6810449575	0.0012150668	0	0.0018226002	0.0030376671	0	0.0650060753	0	0	0.0978128797	0	0	0	0.9951397327	0.0407047388	0.0984204131	0	0	0	0.0978128797	0	0	0	0
+"C_2"	0.0039241334	0.0843688685	0.0052321779	0.0987573578	0.1661216481	0.2282537606	0	0	0.0006540222	0.1523871812	0.0039241334	0.013734467	0.0019620667	0	0.221059516	0	0.0045781557	0.0052321779	0.0078482668	0.0032701112	0	0	0.0006540222	0.0006540222	0	0.0006540222	0.0019620667	0	0.0019620667	0.0006540222	0.0013080445	0.0013080445	0.0412034009	0	0.0202746893	0.0065402224	0.0529758012	0.0013080445	0.096795291	0.0778286462	0.0405493787	0.0013080445	0.0019620667	0	0	0.0869849575
+"C_3"	0.0062959077	0	0.0010493179	0	0	0	0	0.3714585519	0.4092339979	0	0.0020986359	0.0010493179	0	0	0.963273872	0	0.0010493179	0.9685204617	0	0.4050367261	0.0010493179	0.0010493179	0	0	0	0	0.0041972718	0	0	0	0.0010493179	0	0.9989506821	0	0	0	0.0041972718	0	0	0.0031479538	0.0010493179	0	0.0010493179	0	0	0
+"C_4"	0.9727448143	0.0002411963	0.0002411963	0.0004823927	0.0002411963	0	0	0	0	0	0.9917993247	0.0009647853	0	0.0299083454	0.010853835	0	0.9901109503	0.003617945	0.0648818138	0.0002411963	0	0	0.0002411963	0.000723589	0	0.1198745779	0.9966232513	0.0308731307	0.0004823927	0.0419681621	0	0.0002411963	0.0002411963	0.0660877955	0.0004823927	0	0.0002411963	0.000723589	0	0.000723589	0.0243608297	0	0.4558610709	0	0	0.001447178
+"C_5"	0.95703125	0	0.8295454545	0	0	0	0.0628551136	0	0	0	0.9943181818	0.0003551136	0	0	0.0102982955	0.0600142045	0.9957386364	0	0.0007102273	0	0	0	0.0003551136	0.0802556818	0.0440340909	0	0.9968039773	0.0003551136	0	0	0.9928977273	0	0	0	0.0454545455	0.0003551136	0.0003551136	0	0	0.0003551136	0.0021306818	0.0003551136	0.0007102273	0.0440340909	0.0838068182	0.0003551136
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bubble/VirData.csv	Tue Oct 27 14:48:56 2020 +0000
@@ -0,0 +1,6 @@
+"x"
+"1" 2816
+"2" 1529
+"3" 1646
+"4" 4146
+"5" 953
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bubble/bubble.xml	Tue Oct 27 14:48:56 2020 +0000
@@ -0,0 +1,27 @@
+<tool id="BubblePlot" name="Bubbleplot" version="1">
+  <description> </description>
+  <requirements>
+    <requirement type="package" >perl</requirement>
+    <requirement type="package" >r-base</requirement>
+  </requirements>
+  <command> <![CDATA[
+	ln -s $__tool_directory__/bubbleplot.R 2>$log &&
+	ln -s $__tool_directory__/MatData.csv 2>>$log &&
+	ln -s $__tool_directory__/VirData.csv 2>>$log &&
+        perl $__tool_directory__/plotMdata.pl $outfile_tabular $plot1 $plot2 ${" ".join(map(str, $input_file))} 2>>$log
+          ]]>
+  </command>
+  <inputs>
+	<param name="input_file" format="txt" type="data" multiple="true" label="show-snps tabular ouput" help="Clustering of SARS-CoV-2 isolates based on high-freq alleles" />
+  </inputs>
+  <outputs>
+	<data format="txt" name="outfile_tabular" label="AF prevalence in SARS-CoV-2 types, as defined in Chiara et tabular format " />
+	<data format="pdf" name="plot1" label="Bubbleplot " />
+	<data format="pdf" name="plot2" label="Heatmap " />
+	<data format="txt" name="log" label="${tool.name} on ${on_string}: log file "/>
+  </outputs>
+  <help>
+  </help>
+  <citations>
+  </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bubble/bubbleplot.R	Tue Oct 27 14:48:56 2020 +0000
@@ -0,0 +1,43 @@
+#!/usr/bin/env Rscript
+args = commandArgs(trailingOnly=TRUE)
+if (length(args)!=3) {
+  st_sentence=paste("This script requires  exactly 3  arguments",length(args),"supplied\n")
+  stop(sentence, call.=FALSE)
+}
+
+bubbleplot<-function(MatData,VirData,palette,magn,main,space)
+{
+        COL=ncol(MatData)
+        ROW=nrow(MatData)
+        Mcopy=rep(1:COL,ROW)
+        Mfactor=rep(1:ROW,rep( COL,ROW ))
+
+        COLOR=rep(palette,rep(COL,ROW))
+        CEX=(log10(t((MatData*100)+1)))*magn
+
+
+        par(mar=c(8,4,4,0),fig=c(0,0.8,0,1))
+        plot(Mcopy,Mfactor,cex=0.1,pch=20,xaxt="n",yaxt="n",xlab="",ylab="",main=main,yaxs="r");
+        #grid(COL*ROW)
+        abline(h=1:ROW)
+        abline(v=1:COL)
+        lines(Mcopy,Mfactor,cex=CEX,col=COLOR,pch=20,type="p")
+
+        axis(1,at=1:COL,labels=colnames(MatData),las=2)
+        axis(2,at=(1:ROW),labels=rownames(MatData),las=2)
+
+        par(fig=c(0.8,1,0,1),new=TRUE,mar=c(8,0,4,1))
+        barplot(log10(VirData+1),horiz=T,las=2,space=space,yaxs="i",main="Log(N genomes)")
+}
+MatData=read.table("MatData.csv",header=T,row.names=1,check.names=F);
+VirData=read.table("VirData.csv",header=T,row.names=1);
+VirData=as.vector(VirData[,1])
+add_data=read.table(args[1],header=T,row.names=1,check.names=F);
+VirData=c(VirData,rep(1,nrow(add_data) ))
+MatData=rbind(MatData,add_data);
+pdf(args[2],width=1200,height=1600)
+bubbleplot(MatData,VirData,c(colors()[c(90,60,150,120,30)],topo.colors(nrow(add_data))),3,main="",space=1)
+dev.off()
+pdf(args[3],width=1600,height=1600)
+heatmap(cor(t(MatData)),col=blues9);
+dev.off()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bubble/plotMdata.pl	Tue Oct 27 14:48:56 2020 +0000
@@ -0,0 +1,39 @@
+$ofile=shift;
+$plot1=shift;
+$plot2=shift;
+@genomes=();
+foreach $var_file (@ARGV)
+{
+	open(IN,$var_file);
+	$genome="genome";
+	while(<IN>)
+	{
+		($pos,$ref,$alt,$gen)=(split(/\s+/))[1,2,3,-1];
+		next unless $ref=~/[ACTG]/ && $alt=~/[ACTG]/;
+		#print $pos $ref $alt\n;
+		if ($genome eq "genome")
+		{
+			push(@genomes,$gen);
+			$genome=$gen;
+		}
+		$is_present{"$pos\_$ref|$alt"}{$genome}=1;
+	}
+}
+
+@test=qw(241_C|T 514_T|C 1059_C|T 1397_G|A 1440_G|A 1605_ATG|... 2416_C|T 2480_A|G 2558_C|T 2891_G|A 3037_C|T 8782_C|T 9477_T|A 10097_G|A 11083_G|T 11916_C|T 14408_C|T 14805_C|T 15324_C|T 17247_T|C 17747_C|T 17858_A|G 18060_C|T 18877_C|T 18998_C|T 20268_A|G 23403_A|G 23731_C|T 24034_C|T 25429_G|T 25563_G|T 25979_G|T 26144_G|T 27046_C|T 27964_C|T 28144_T|C 28311_C|T 28657_C|T 28688_T|C 28851_G|T 28854_C|T 28863_C|T 28881_GGG|AAC 29540_G|A 29553_G|A 29742_G|T);
+
+open(OUT,">$ofile");
+print OUT " @test\n";
+
+foreach $genome (@genomes)
+{
+	$ostring="$genome ";
+	foreach $t (@test)
+	{
+		$val=$is_present{$t}{$genome} ? 1 : 0;
+		$ostring.="$val ";
+	}
+	chop($ostring);
+	print OUT "$ostring\n";
+}
+system("Rscript --vanilla bubbleplot.R $ofile $plot1 $plot2")==0||die("no plot");