# HG changeset patch # User morinlab # Date 1476209792 14400 # Node ID c6e48c6ac6069858257ebb64ec379e39d51c20af planemo upload for repository https://github.com/morinlab/tools-morinlab/tree/master/tools/calc_cnsegments commit 4ef2d91b7c1686a2696b92fe538d4aec51d05e40-dirty diff -r 000000000000 -r c6e48c6ac606 calc_cnsegements_titan.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/calc_cnsegements_titan.pl Tue Oct 11 14:16:32 2016 -0400 @@ -0,0 +1,238 @@ +#!/usr/bin/perl -w +=head1 NAME + +createTITANsegmentfiles.pl + +-head1 SYNOPSIS + +=head1 OPTIONS + -id + -outfile|o + -symmetric|s {1 - true; 0 - false}; Default: 1 + -outIGV|igv Required: + -infile|i Required: TITAN *titan.txt file + +=head1 DESCRIPTION + +=head1 CONTACT + +Gavin Ha + +=cut + +use strict; +use List::Util qw[min max]; +use File::Basename; +use Getopt::Long; + + +my $version = '0.1.3'; + +sub usage () { + exec('perldoc', $0); + exit; +} + +my ($id, $outfile, $symmetric, $outIGV, $infile, $calls, $help); +$id=0; +$calls = 1; +$symmetric = 1; +GetOptions ( + 'id=s' => \$id, + 'infile|i=s' => \$infile, + 'symmetric|s=i' => \$symmetric, + 'outfile|o=s' => \$outfile, + 'outIGV|igv=s' => \$outIGV, + 'help|?' => \$help + ); + +if($help) { + &usage(); +} +print "Parameters:\nid=$id\ninfile=$infile\noutfile=$outfile\noutIGV=$outIGV\n"; + +my ($name,$path,$suffix) = fileparse($infile); +#my @jnk; +#($id,@jnk) = split(/\_/,$name) if ~defined($id); +my $nameOut = $id; + +open OUTFILE, ">$outfile" || die("Cannot write to $outfile\n"); +open IGV, ">$outIGV" || die("Cannot write to $outIGV\n"); +print OUTFILE "Sample\tChromosome\tStart_Position(bp)\tEnd_Position(bp)\tLength(bp)\tMedian_Ratio\tMedian_logR\tTITAN_state\tTITAN_call\tCopy_Number\tMinorCN\tMajorCN\tClonal_Cluster\tClonal_Frequency\n"; +print IGV "sample\tchr\tstart\tend\tnum.mark\tmedian\n"; +open(SEGFILE, $infile) || die("Can't open $infile!\n"); +my $header = ; +my $line = ; chomp($line); +my($chr, $start, $ref, $nRef, $N, $ratio, $logR, $cn, $state, $call, $clust, $clustFreq, @rest) = split(/\t/,$line); +my $end = $start; +my @totalRatio=(); +my @totalLogR=(); +push(@totalRatio,max($ratio,1-$ratio)); +push(@totalLogR,$logR); +while($line=) { +chomp($line); +my ($chrS, $startS, $refS, $nRefS, $NS, $ratioS, $logRS, $cnS, $stateS, $callS, $clustS, $clustFreqS, @rest) = split(/\t/,$line); +if ($chrS ne $chr || $callS ne $call || $clustS ne $clust){ + my $medianRatio = max($ratio,1-$ratio); #1 position only + my $medianLogR = $logR; + $medianRatio = median(@totalRatio) if (($end-$start+1)>1); #more than 1 position + $medianLogR = median(@totalLogR) if (($end-$start+1)>1); + my @majmin; + if ($symmetric == 0){ + @majmin = &getMinorMajorCN($state); + }elsif ($symmetric == 1){ + @majmin = &getMinorMajorCNSymmetric($state); + } + my $output = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . ($end-$start+1). "\t" . sprintf("%.2f",$medianRatio) . "\t" . sprintf("%.2f",$medianLogR); + $output = $output . "\t" . $state . "\t" . $call if ($calls); + $output = $output . "\t" . $cn . "\t" . $majmin[0] . "\t" . $majmin[1] . "\t" . $clust . "\t" . $clustFreq; + print OUTFILE $output . "\n"; + my $output2 = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . scalar(@totalRatio). "\t" . sprintf("%.2f",$medianLogR); + print IGV $output2 . "\n"; + #reset + @totalRatio = (); + @totalLogR = (); + $start = $startS; +}else{ + + #push(@totalRatio,max($ratioS,1-$ratioS)); + #assign current state to previous variables + #new end, but start still the same +} +($chr, $end, $ref, $nRef, $N, $ratio, $logR, $cn, $state, $call, $clust, $clustFreq) = ($chrS, $startS, $refS, $nRefS, $NS, $ratioS, $logRS, $cnS, $stateS, $callS, $clustS, $clustFreqS); +push(@totalRatio,max($ratioS,1-$ratioS)); +push(@totalLogR,$logRS); +} + +#final output +my $medianRatio = max($ratio,1-$ratio); #1 position only +$medianRatio = median(@totalRatio) if (($end-$start+1)>1); #more than 1 position +my $medianLogR = $logR; +$medianLogR = median(@totalLogR) if (($end-$start+1)>1); +my $output = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . ($end-$start+1). "\t" . sprintf("%.4f",$medianRatio) . "\t" . sprintf("%.2f",$medianLogR); +my @majmin; +if ($symmetric == 0){ + @majmin = &getMinorMajorCN($state); +}elsif ($symmetric == 1){ + @majmin = &getMinorMajorCNSymmetric($state); +} +$output = $output . "\t" . $state . "\t" . $call if ($calls); +$output = $output . "\t" . $cn . "\t" . $majmin[0] . "\t" . $majmin[1] . "\t" . $clust . "\t" . $clustFreq; +print OUTFILE $output . "\n"; +my $output2 = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . scalar(@totalRatio). "\t" . sprintf("%.2f",$medianLogR); +print IGV $output2 . "\n"; +close OUTFILE; +close IGV; +#my $returnCode = `sort -k2,2n -k3,3n $outfile\.tmp > $outfile; rm $outfile\.tmp;`; +close SEGFILE; + + +sub median { + my (@array_ref) = @_; + my $count = scalar @array_ref; + # Sort a COPY of the array, leaving the original untouched + my @array = sort { $a <=> $b } @array_ref; + if ($count % 2) { + return $array[int($count/2)]; + } else { + return ($array[$count/2] + $array[$count/2 - 1]) / 2; + } +} + + +sub getMinorMajorCN { + my ($state) = @_; + my @majmin; $majmin[0]="NA"; $majmin[1]="NA"; + if ($state==0){ + $majmin[0] = 0; $majmin[1] = 0; + }elsif ($state==1 || $state==2){ + $majmin[0] = 0; $majmin[1] = 1; + }elsif($state==4){ + $majmin[0] = 1; $majmin[1] = 1; + }elsif ($state==3 || $state==5){ + $majmin[0] = 0; $majmin[1] = 2; + }elsif ($state==6 || $state==9){ + $majmin[0] = 0; $majmin[1] = 3; + }elsif ($state==10 || $state==14){ + $majmin[0] = 0; $majmin[1] = 4; + }elsif ($state==15 || $state==20){ + $majmin[0] = 0; $majmin[1] = 5; + }elsif ($state==7 || $state==8){ + $majmin[0] = 1; $majmin[1] = 2; + }elsif ($state==11 || $state==13){ + $majmin[0] = 1; $majmin[1] = 3; + }elsif ($state==16 || $state==19){ + $majmin[0] = 1; $majmin[1] = 4; + }elsif ($state==17 || $state==18){ + $majmin[0] = 2; $majmin[1] = 3; + }elsif ($state==12){ + $majmin[0] = 2; $majmin[1] = 2; + } + #my $retVal = ($majmin[0],$majmin[1]); + return @majmin; +} + +sub getMinorMajorCNSymmetric { + my ($state) = @_; + my @majmin; $majmin[0]="NA"; $majmin[1]="NA"; + if ($state==0){ + $majmin[0] = 0; $majmin[1] = 0; + }elsif ($state==1){ + $majmin[0] = 0; $majmin[1] = 1; + }elsif($state==2){ + $majmin[0] = 0; $majmin[1] = 2; + }elsif ($state==3){ + $majmin[0] = 1; $majmin[1] = 1; + }elsif ($state==4){ + $majmin[0] = 0; $majmin[1] = 3; + }elsif ($state==5){ + $majmin[0] = 1; $majmin[1] = 2; + }elsif ($state==6){ + $majmin[0] = 0; $majmin[1] = 4; + }elsif ($state==7){ + $majmin[0] = 1; $majmin[1] = 3; + }elsif ($state==8){ + $majmin[0] = 2; $majmin[1] = 2; + }elsif ($state==9){ + $majmin[0] = 0; $majmin[1] = 5; + }elsif ($state==10){ + $majmin[0] = 1; $majmin[1] = 4; + }elsif ($state==11){ + $majmin[0] = 2; $majmin[1] = 3; + }elsif ($state==12){ + $majmin[0] = 0; $majmin[1] = 6; + }elsif ($state==13){ + $majmin[0] = 1; $majmin[1] = 5; + }elsif ($state==14){ + $majmin[0] = 2; $majmin[1] = 4; + }elsif ($state==15){ + $majmin[0] = 3; $majmin[1] = 3; + }elsif ($state==16){ + $majmin[0] = 0; $majmin[1] = 7; + }elsif ($state==17){ + $majmin[0] = 1; $majmin[1] = 6; + }elsif ($state==18){ + $majmin[0] = 2; $majmin[1] = 5; + }elsif ($state==19){ + $majmin[0] = 3; $majmin[1] = 4; + }elsif ($state==20){ + $majmin[0] = 0; $majmin[1] = 8; + }elsif ($state==21){ + $majmin[0] = 1; $majmin[1] = 7; + }elsif ($state==22){ + $majmin[0] = 2; $majmin[1] = 6; + }elsif ($state==23){ + $majmin[0] = 3; $majmin[1] = 5; + }elsif ($state==24){ + $majmin[0] = 4; $majmin[1] = 4; + } + #my $retVal = ($majmin[0],$majmin[1]); + return @majmin; +} + + +#Read more: http://wiki.answers.com/Q/How_can_you_calculate_the_average_and_median_in_perl_by_subroutine#ixzz1LcMWdXEE +#sub median{ +# my @a = sort @_; +# return ($a[$#a/2] + $a[@a/2]) / 2; +#} diff -r 000000000000 -r c6e48c6ac606 calc_cnsegements_titan.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/calc_cnsegements_titan.xml Tue Oct 11 14:16:32 2016 -0400 @@ -0,0 +1,42 @@ + + + Calculate Copy Number Segments from TITAN output + + + + + + + + + + + + + + + + + + + + + + + + +