changeset 0:c6e48c6ac606 draft default tip

planemo upload for repository https://github.com/morinlab/tools-morinlab/tree/master/tools/calc_cnsegments commit 4ef2d91b7c1686a2696b92fe538d4aec51d05e40-dirty
author morinlab
date Tue, 11 Oct 2016 14:16:32 -0400
parents
children
files calc_cnsegements_titan.pl calc_cnsegements_titan.xml
diffstat 2 files changed, 280 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/calc_cnsegements_titan.pl	Tue Oct 11 14:16:32 2016 -0400
@@ -0,0 +1,238 @@
+#!/usr/bin/perl -w
+=head1 NAME        
+
+createTITANsegmentfiles.pl
+
+-head1 SYNOPSIS
+
+=head1 OPTIONS 
+    -id <string>
+    -outfile|o <string>
+    -symmetric|s <boolean>  {1 - true; 0 - false}; Default: 1
+    -outIGV|igv <string> Required: 
+    -infile|i <string>   Required: TITAN *titan.txt file
+  
+=head1 DESCRIPTION
+
+=head1 CONTACT
+
+Gavin Ha <gha@bccrc.ca>
+
+=cut
+
+use strict;
+use List::Util qw[min max];
+use File::Basename;
+use Getopt::Long;
+
+
+my $version = '0.1.3';
+
+sub usage () {
+    exec('perldoc', $0);
+    exit;
+}
+
+my ($id, $outfile, $symmetric, $outIGV, $infile, $calls, $help);
+$id=0;
+$calls = 1;
+$symmetric = 1;
+GetOptions (
+		'id=s' => \$id,
+	    'infile|i=s' => \$infile,
+	    'symmetric|s=i' => \$symmetric,
+        'outfile|o=s' => \$outfile,
+        'outIGV|igv=s' => \$outIGV,
+        'help|?' => \$help
+            );
+
+if($help) {
+    &usage();
+}
+print "Parameters:\nid=$id\ninfile=$infile\noutfile=$outfile\noutIGV=$outIGV\n";
+
+my ($name,$path,$suffix) = fileparse($infile);
+#my @jnk;
+#($id,@jnk) = split(/\_/,$name) if ~defined($id);
+my $nameOut = $id;
+
+open OUTFILE, ">$outfile" || die("Cannot write to $outfile\n");
+open IGV, ">$outIGV" || die("Cannot write to $outIGV\n");
+print OUTFILE "Sample\tChromosome\tStart_Position(bp)\tEnd_Position(bp)\tLength(bp)\tMedian_Ratio\tMedian_logR\tTITAN_state\tTITAN_call\tCopy_Number\tMinorCN\tMajorCN\tClonal_Cluster\tClonal_Frequency\n";
+print IGV "sample\tchr\tstart\tend\tnum.mark\tmedian\n";
+open(SEGFILE, $infile) || die("Can't open $infile!\n");
+my $header = <SEGFILE>;
+my $line = <SEGFILE>; chomp($line);
+my($chr, $start, $ref, $nRef, $N, $ratio, $logR, $cn, $state, $call, $clust, $clustFreq, @rest) = split(/\t/,$line);
+my $end = $start;
+my @totalRatio=();
+my @totalLogR=();
+push(@totalRatio,max($ratio,1-$ratio));
+push(@totalLogR,$logR);
+while($line=<SEGFILE>) {
+chomp($line);
+my ($chrS, $startS, $refS, $nRefS, $NS, $ratioS, $logRS, $cnS, $stateS, $callS, $clustS, $clustFreqS, @rest) = split(/\t/,$line);	
+if ($chrS ne $chr || $callS ne $call || $clustS ne $clust){
+	my $medianRatio = max($ratio,1-$ratio); #1 position only
+	my $medianLogR = $logR;
+	$medianRatio = median(@totalRatio) if (($end-$start+1)>1); #more than 1 position
+	$medianLogR = median(@totalLogR) if (($end-$start+1)>1);
+	my @majmin;
+	if ($symmetric == 0){
+		@majmin = &getMinorMajorCN($state);
+	}elsif ($symmetric == 1){
+		@majmin = &getMinorMajorCNSymmetric($state);
+	}
+	my $output = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . ($end-$start+1). "\t" . sprintf("%.2f",$medianRatio) . "\t" . sprintf("%.2f",$medianLogR);	
+	$output = $output . "\t" .  $state . "\t" . $call if ($calls);
+	$output = $output . "\t" . $cn . "\t" . $majmin[0] . "\t" . $majmin[1] . "\t" . $clust . "\t" . $clustFreq;
+	print OUTFILE $output . "\n";
+	my $output2 = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . scalar(@totalRatio). "\t" . sprintf("%.2f",$medianLogR);
+	print IGV $output2 . "\n";
+	#reset
+	@totalRatio = ();
+	@totalLogR = ();
+	$start = $startS; 
+}else{
+	
+	#push(@totalRatio,max($ratioS,1-$ratioS));
+	#assign current state to previous variables
+	#new end, but start still the same
+}
+($chr, $end, $ref, $nRef, $N, $ratio, $logR, $cn, $state, $call, $clust, $clustFreq) = ($chrS, $startS, $refS, $nRefS, $NS, $ratioS, $logRS, $cnS, $stateS, $callS, $clustS, $clustFreqS);
+push(@totalRatio,max($ratioS,1-$ratioS));
+push(@totalLogR,$logRS);
+}
+
+#final output
+my $medianRatio = max($ratio,1-$ratio); #1 position only
+$medianRatio = median(@totalRatio) if (($end-$start+1)>1); #more than 1 position
+my $medianLogR = $logR;
+$medianLogR = median(@totalLogR) if (($end-$start+1)>1);
+my $output = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . ($end-$start+1). "\t" . sprintf("%.4f",$medianRatio) . "\t" . sprintf("%.2f",$medianLogR);	
+my @majmin;
+if ($symmetric == 0){
+		@majmin = &getMinorMajorCN($state);
+}elsif ($symmetric == 1){
+		@majmin = &getMinorMajorCNSymmetric($state);
+}
+$output = $output . "\t" .  $state . "\t" . $call if ($calls);
+$output = $output . "\t" . $cn . "\t" . $majmin[0] . "\t" . $majmin[1] . "\t" . $clust . "\t" . $clustFreq;
+print OUTFILE $output . "\n";
+my $output2 = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . scalar(@totalRatio). "\t" . sprintf("%.2f",$medianLogR);
+print IGV $output2 . "\n";
+close OUTFILE;
+close IGV;
+#my $returnCode = `sort -k2,2n -k3,3n $outfile\.tmp > $outfile; rm $outfile\.tmp;`;
+close SEGFILE;
+
+
+sub median { 
+	my (@array_ref) = @_; 
+	my $count = scalar @array_ref; 
+	# Sort a COPY of the array, leaving the original untouched 
+	my @array = sort { $a <=> $b } @array_ref; 
+	if ($count % 2) { 
+		return $array[int($count/2)]; 
+	} else { 
+		return ($array[$count/2] + $array[$count/2 - 1]) / 2; 
+	} 
+} 
+
+
+sub getMinorMajorCN {
+	my ($state) = @_;
+	my @majmin; $majmin[0]="NA"; $majmin[1]="NA";
+	if ($state==0){
+		$majmin[0] = 0; $majmin[1] = 0;
+	}elsif ($state==1 || $state==2){
+		$majmin[0] = 0; $majmin[1] = 1;
+	}elsif($state==4){
+		$majmin[0] = 1; $majmin[1] = 1;
+	}elsif ($state==3 || $state==5){
+		$majmin[0] = 0; $majmin[1] = 2;
+	}elsif ($state==6 || $state==9){
+		$majmin[0] = 0; $majmin[1] = 3;
+	}elsif ($state==10 || $state==14){
+		$majmin[0] = 0; $majmin[1] = 4;
+	}elsif ($state==15 || $state==20){
+		$majmin[0] = 0; $majmin[1] = 5;
+	}elsif ($state==7 || $state==8){
+		$majmin[0] = 1; $majmin[1] = 2;
+	}elsif ($state==11 || $state==13){
+		$majmin[0] = 1; $majmin[1] = 3;
+	}elsif ($state==16 || $state==19){
+		$majmin[0] = 1; $majmin[1] = 4;
+	}elsif ($state==17 || $state==18){
+		$majmin[0] = 2; $majmin[1] = 3;
+	}elsif ($state==12){
+		$majmin[0] = 2; $majmin[1] = 2;
+	}
+	#my $retVal = ($majmin[0],$majmin[1]);	
+	return @majmin;
+}
+
+sub getMinorMajorCNSymmetric {
+	my ($state) = @_;
+	my @majmin; $majmin[0]="NA"; $majmin[1]="NA";
+	if ($state==0){
+		$majmin[0] = 0; $majmin[1] = 0;
+	}elsif ($state==1){
+		$majmin[0] = 0; $majmin[1] = 1;
+	}elsif($state==2){
+		$majmin[0] = 0; $majmin[1] = 2;
+	}elsif ($state==3){
+		$majmin[0] = 1; $majmin[1] = 1;
+	}elsif ($state==4){
+		$majmin[0] = 0; $majmin[1] = 3;
+	}elsif ($state==5){
+		$majmin[0] = 1; $majmin[1] = 2;
+	}elsif ($state==6){
+		$majmin[0] = 0; $majmin[1] = 4;
+	}elsif ($state==7){
+		$majmin[0] = 1; $majmin[1] = 3;
+	}elsif ($state==8){
+		$majmin[0] = 2; $majmin[1] = 2;
+	}elsif ($state==9){
+		$majmin[0] = 0; $majmin[1] = 5;
+	}elsif ($state==10){
+		$majmin[0] = 1; $majmin[1] = 4;
+	}elsif ($state==11){
+		$majmin[0] = 2; $majmin[1] = 3;
+	}elsif ($state==12){
+		$majmin[0] = 0; $majmin[1] = 6;
+	}elsif ($state==13){
+		$majmin[0] = 1; $majmin[1] = 5;
+	}elsif ($state==14){
+		$majmin[0] = 2; $majmin[1] = 4;
+	}elsif ($state==15){
+		$majmin[0] = 3; $majmin[1] = 3;
+	}elsif ($state==16){
+		$majmin[0] = 0; $majmin[1] = 7;
+	}elsif ($state==17){
+		$majmin[0] = 1; $majmin[1] = 6;
+	}elsif ($state==18){
+		$majmin[0] = 2; $majmin[1] = 5;
+	}elsif ($state==19){
+		$majmin[0] = 3; $majmin[1] = 4;
+	}elsif ($state==20){
+		$majmin[0] = 0; $majmin[1] = 8;
+	}elsif ($state==21){
+		$majmin[0] = 1; $majmin[1] = 7;
+	}elsif ($state==22){
+		$majmin[0] = 2; $majmin[1] = 6;
+	}elsif ($state==23){
+		$majmin[0] = 3; $majmin[1] = 5;
+	}elsif ($state==24){
+		$majmin[0] = 4; $majmin[1] = 4;
+	}
+	#my $retVal = ($majmin[0],$majmin[1]);	
+	return @majmin;
+}
+
+
+#Read more: http://wiki.answers.com/Q/How_can_you_calculate_the_average_and_median_in_perl_by_subroutine#ixzz1LcMWdXEE
+#sub median{
+#	my @a = sort @_;
+#	return ($a[$#a/2] + $a[@a/2]) / 2;
+#}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/calc_cnsegements_titan.xml	Tue Oct 11 14:16:32 2016 -0400
@@ -0,0 +1,42 @@
+<tool id="calc_cnsegments" name="calc_cnsegements" version="0.0.1">
+  <description>
+    Calculate Copy Number Segments from TITAN output
+  </description>
+  <command>
+    <![CDATA[
+    perl $__tool_directory__/calc_cnsegements_titan.pl
+
+      #if $sampleid_source.sampleid_selector == "bamfile":
+        -id \$(basename $sampleid_source.sample_id | sed 's/.bam$//g' )
+      #else:
+        -id $sampleid_source.sample_id
+      #end if
+
+     -o $outfile
+     -s $symmetric
+     -igv $outIGV
+     -i $infile
+
+    ]]>
+  </command>
+  <inputs>
+    <conditional name="sampleid_source">
+      <param label="Choose the source to open the Sample Id" name="sampleid_selector" type="select">
+        <option value="bamfile">Bam File Name</option>
+        <option value="manual">Manual</option>
+      </param>
+      <when value="bamfile">
+        <param type="data" format="bam" name="sample_id" label="Sequence Alignment File"/>
+      </when>
+      <when value="manual">
+        <param name="sample_id" type="text" label="Sample ID (Name)"/>
+      </when>
+    </conditional>
+    <param type="data" format="txt" name="infile" label="SNP Summary Information (TITAN Output)"/>
+    <param type="boolean" name="symmetric" label="Symmetric" checked="yes" truevalue="1" falsevalue="0"/>
+  </inputs>
+  <outputs>
+    <data format="txt" name="outfile"/>
+    <data format="seg" name="outIGV"/>
+  </outputs>
+</tool>