comparison calc_cnsegements_titan.pl @ 0:c6e48c6ac606 draft default tip

planemo upload for repository https://github.com/morinlab/tools-morinlab/tree/master/tools/calc_cnsegments commit 4ef2d91b7c1686a2696b92fe538d4aec51d05e40-dirty
author morinlab
date Tue, 11 Oct 2016 14:16:32 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c6e48c6ac606
1 #!/usr/bin/perl -w
2 =head1 NAME
3
4 createTITANsegmentfiles.pl
5
6 -head1 SYNOPSIS
7
8 =head1 OPTIONS
9 -id <string>
10 -outfile|o <string>
11 -symmetric|s <boolean> {1 - true; 0 - false}; Default: 1
12 -outIGV|igv <string> Required:
13 -infile|i <string> Required: TITAN *titan.txt file
14
15 =head1 DESCRIPTION
16
17 =head1 CONTACT
18
19 Gavin Ha <gha@bccrc.ca>
20
21 =cut
22
23 use strict;
24 use List::Util qw[min max];
25 use File::Basename;
26 use Getopt::Long;
27
28
29 my $version = '0.1.3';
30
31 sub usage () {
32 exec('perldoc', $0);
33 exit;
34 }
35
36 my ($id, $outfile, $symmetric, $outIGV, $infile, $calls, $help);
37 $id=0;
38 $calls = 1;
39 $symmetric = 1;
40 GetOptions (
41 'id=s' => \$id,
42 'infile|i=s' => \$infile,
43 'symmetric|s=i' => \$symmetric,
44 'outfile|o=s' => \$outfile,
45 'outIGV|igv=s' => \$outIGV,
46 'help|?' => \$help
47 );
48
49 if($help) {
50 &usage();
51 }
52 print "Parameters:\nid=$id\ninfile=$infile\noutfile=$outfile\noutIGV=$outIGV\n";
53
54 my ($name,$path,$suffix) = fileparse($infile);
55 #my @jnk;
56 #($id,@jnk) = split(/\_/,$name) if ~defined($id);
57 my $nameOut = $id;
58
59 open OUTFILE, ">$outfile" || die("Cannot write to $outfile\n");
60 open IGV, ">$outIGV" || die("Cannot write to $outIGV\n");
61 print OUTFILE "Sample\tChromosome\tStart_Position(bp)\tEnd_Position(bp)\tLength(bp)\tMedian_Ratio\tMedian_logR\tTITAN_state\tTITAN_call\tCopy_Number\tMinorCN\tMajorCN\tClonal_Cluster\tClonal_Frequency\n";
62 print IGV "sample\tchr\tstart\tend\tnum.mark\tmedian\n";
63 open(SEGFILE, $infile) || die("Can't open $infile!\n");
64 my $header = <SEGFILE>;
65 my $line = <SEGFILE>; chomp($line);
66 my($chr, $start, $ref, $nRef, $N, $ratio, $logR, $cn, $state, $call, $clust, $clustFreq, @rest) = split(/\t/,$line);
67 my $end = $start;
68 my @totalRatio=();
69 my @totalLogR=();
70 push(@totalRatio,max($ratio,1-$ratio));
71 push(@totalLogR,$logR);
72 while($line=<SEGFILE>) {
73 chomp($line);
74 my ($chrS, $startS, $refS, $nRefS, $NS, $ratioS, $logRS, $cnS, $stateS, $callS, $clustS, $clustFreqS, @rest) = split(/\t/,$line);
75 if ($chrS ne $chr || $callS ne $call || $clustS ne $clust){
76 my $medianRatio = max($ratio,1-$ratio); #1 position only
77 my $medianLogR = $logR;
78 $medianRatio = median(@totalRatio) if (($end-$start+1)>1); #more than 1 position
79 $medianLogR = median(@totalLogR) if (($end-$start+1)>1);
80 my @majmin;
81 if ($symmetric == 0){
82 @majmin = &getMinorMajorCN($state);
83 }elsif ($symmetric == 1){
84 @majmin = &getMinorMajorCNSymmetric($state);
85 }
86 my $output = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . ($end-$start+1). "\t" . sprintf("%.2f",$medianRatio) . "\t" . sprintf("%.2f",$medianLogR);
87 $output = $output . "\t" . $state . "\t" . $call if ($calls);
88 $output = $output . "\t" . $cn . "\t" . $majmin[0] . "\t" . $majmin[1] . "\t" . $clust . "\t" . $clustFreq;
89 print OUTFILE $output . "\n";
90 my $output2 = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . scalar(@totalRatio). "\t" . sprintf("%.2f",$medianLogR);
91 print IGV $output2 . "\n";
92 #reset
93 @totalRatio = ();
94 @totalLogR = ();
95 $start = $startS;
96 }else{
97
98 #push(@totalRatio,max($ratioS,1-$ratioS));
99 #assign current state to previous variables
100 #new end, but start still the same
101 }
102 ($chr, $end, $ref, $nRef, $N, $ratio, $logR, $cn, $state, $call, $clust, $clustFreq) = ($chrS, $startS, $refS, $nRefS, $NS, $ratioS, $logRS, $cnS, $stateS, $callS, $clustS, $clustFreqS);
103 push(@totalRatio,max($ratioS,1-$ratioS));
104 push(@totalLogR,$logRS);
105 }
106
107 #final output
108 my $medianRatio = max($ratio,1-$ratio); #1 position only
109 $medianRatio = median(@totalRatio) if (($end-$start+1)>1); #more than 1 position
110 my $medianLogR = $logR;
111 $medianLogR = median(@totalLogR) if (($end-$start+1)>1);
112 my $output = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . ($end-$start+1). "\t" . sprintf("%.4f",$medianRatio) . "\t" . sprintf("%.2f",$medianLogR);
113 my @majmin;
114 if ($symmetric == 0){
115 @majmin = &getMinorMajorCN($state);
116 }elsif ($symmetric == 1){
117 @majmin = &getMinorMajorCNSymmetric($state);
118 }
119 $output = $output . "\t" . $state . "\t" . $call if ($calls);
120 $output = $output . "\t" . $cn . "\t" . $majmin[0] . "\t" . $majmin[1] . "\t" . $clust . "\t" . $clustFreq;
121 print OUTFILE $output . "\n";
122 my $output2 = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . scalar(@totalRatio). "\t" . sprintf("%.2f",$medianLogR);
123 print IGV $output2 . "\n";
124 close OUTFILE;
125 close IGV;
126 #my $returnCode = `sort -k2,2n -k3,3n $outfile\.tmp > $outfile; rm $outfile\.tmp;`;
127 close SEGFILE;
128
129
130 sub median {
131 my (@array_ref) = @_;
132 my $count = scalar @array_ref;
133 # Sort a COPY of the array, leaving the original untouched
134 my @array = sort { $a <=> $b } @array_ref;
135 if ($count % 2) {
136 return $array[int($count/2)];
137 } else {
138 return ($array[$count/2] + $array[$count/2 - 1]) / 2;
139 }
140 }
141
142
143 sub getMinorMajorCN {
144 my ($state) = @_;
145 my @majmin; $majmin[0]="NA"; $majmin[1]="NA";
146 if ($state==0){
147 $majmin[0] = 0; $majmin[1] = 0;
148 }elsif ($state==1 || $state==2){
149 $majmin[0] = 0; $majmin[1] = 1;
150 }elsif($state==4){
151 $majmin[0] = 1; $majmin[1] = 1;
152 }elsif ($state==3 || $state==5){
153 $majmin[0] = 0; $majmin[1] = 2;
154 }elsif ($state==6 || $state==9){
155 $majmin[0] = 0; $majmin[1] = 3;
156 }elsif ($state==10 || $state==14){
157 $majmin[0] = 0; $majmin[1] = 4;
158 }elsif ($state==15 || $state==20){
159 $majmin[0] = 0; $majmin[1] = 5;
160 }elsif ($state==7 || $state==8){
161 $majmin[0] = 1; $majmin[1] = 2;
162 }elsif ($state==11 || $state==13){
163 $majmin[0] = 1; $majmin[1] = 3;
164 }elsif ($state==16 || $state==19){
165 $majmin[0] = 1; $majmin[1] = 4;
166 }elsif ($state==17 || $state==18){
167 $majmin[0] = 2; $majmin[1] = 3;
168 }elsif ($state==12){
169 $majmin[0] = 2; $majmin[1] = 2;
170 }
171 #my $retVal = ($majmin[0],$majmin[1]);
172 return @majmin;
173 }
174
175 sub getMinorMajorCNSymmetric {
176 my ($state) = @_;
177 my @majmin; $majmin[0]="NA"; $majmin[1]="NA";
178 if ($state==0){
179 $majmin[0] = 0; $majmin[1] = 0;
180 }elsif ($state==1){
181 $majmin[0] = 0; $majmin[1] = 1;
182 }elsif($state==2){
183 $majmin[0] = 0; $majmin[1] = 2;
184 }elsif ($state==3){
185 $majmin[0] = 1; $majmin[1] = 1;
186 }elsif ($state==4){
187 $majmin[0] = 0; $majmin[1] = 3;
188 }elsif ($state==5){
189 $majmin[0] = 1; $majmin[1] = 2;
190 }elsif ($state==6){
191 $majmin[0] = 0; $majmin[1] = 4;
192 }elsif ($state==7){
193 $majmin[0] = 1; $majmin[1] = 3;
194 }elsif ($state==8){
195 $majmin[0] = 2; $majmin[1] = 2;
196 }elsif ($state==9){
197 $majmin[0] = 0; $majmin[1] = 5;
198 }elsif ($state==10){
199 $majmin[0] = 1; $majmin[1] = 4;
200 }elsif ($state==11){
201 $majmin[0] = 2; $majmin[1] = 3;
202 }elsif ($state==12){
203 $majmin[0] = 0; $majmin[1] = 6;
204 }elsif ($state==13){
205 $majmin[0] = 1; $majmin[1] = 5;
206 }elsif ($state==14){
207 $majmin[0] = 2; $majmin[1] = 4;
208 }elsif ($state==15){
209 $majmin[0] = 3; $majmin[1] = 3;
210 }elsif ($state==16){
211 $majmin[0] = 0; $majmin[1] = 7;
212 }elsif ($state==17){
213 $majmin[0] = 1; $majmin[1] = 6;
214 }elsif ($state==18){
215 $majmin[0] = 2; $majmin[1] = 5;
216 }elsif ($state==19){
217 $majmin[0] = 3; $majmin[1] = 4;
218 }elsif ($state==20){
219 $majmin[0] = 0; $majmin[1] = 8;
220 }elsif ($state==21){
221 $majmin[0] = 1; $majmin[1] = 7;
222 }elsif ($state==22){
223 $majmin[0] = 2; $majmin[1] = 6;
224 }elsif ($state==23){
225 $majmin[0] = 3; $majmin[1] = 5;
226 }elsif ($state==24){
227 $majmin[0] = 4; $majmin[1] = 4;
228 }
229 #my $retVal = ($majmin[0],$majmin[1]);
230 return @majmin;
231 }
232
233
234 #Read more: http://wiki.answers.com/Q/How_can_you_calculate_the_average_and_median_in_perl_by_subroutine#ixzz1LcMWdXEE
235 #sub median{
236 # my @a = sort @_;
237 # return ($a[$#a/2] + $a[@a/2]) / 2;
238 #}