Mercurial > repos > morinlab > calc_cnsegments
comparison calc_cnsegements_titan.pl @ 0:c6e48c6ac606 draft default tip
planemo upload for repository https://github.com/morinlab/tools-morinlab/tree/master/tools/calc_cnsegments commit 4ef2d91b7c1686a2696b92fe538d4aec51d05e40-dirty
author | morinlab |
---|---|
date | Tue, 11 Oct 2016 14:16:32 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c6e48c6ac606 |
---|---|
1 #!/usr/bin/perl -w | |
2 =head1 NAME | |
3 | |
4 createTITANsegmentfiles.pl | |
5 | |
6 -head1 SYNOPSIS | |
7 | |
8 =head1 OPTIONS | |
9 -id <string> | |
10 -outfile|o <string> | |
11 -symmetric|s <boolean> {1 - true; 0 - false}; Default: 1 | |
12 -outIGV|igv <string> Required: | |
13 -infile|i <string> Required: TITAN *titan.txt file | |
14 | |
15 =head1 DESCRIPTION | |
16 | |
17 =head1 CONTACT | |
18 | |
19 Gavin Ha <gha@bccrc.ca> | |
20 | |
21 =cut | |
22 | |
23 use strict; | |
24 use List::Util qw[min max]; | |
25 use File::Basename; | |
26 use Getopt::Long; | |
27 | |
28 | |
29 my $version = '0.1.3'; | |
30 | |
31 sub usage () { | |
32 exec('perldoc', $0); | |
33 exit; | |
34 } | |
35 | |
36 my ($id, $outfile, $symmetric, $outIGV, $infile, $calls, $help); | |
37 $id=0; | |
38 $calls = 1; | |
39 $symmetric = 1; | |
40 GetOptions ( | |
41 'id=s' => \$id, | |
42 'infile|i=s' => \$infile, | |
43 'symmetric|s=i' => \$symmetric, | |
44 'outfile|o=s' => \$outfile, | |
45 'outIGV|igv=s' => \$outIGV, | |
46 'help|?' => \$help | |
47 ); | |
48 | |
49 if($help) { | |
50 &usage(); | |
51 } | |
52 print "Parameters:\nid=$id\ninfile=$infile\noutfile=$outfile\noutIGV=$outIGV\n"; | |
53 | |
54 my ($name,$path,$suffix) = fileparse($infile); | |
55 #my @jnk; | |
56 #($id,@jnk) = split(/\_/,$name) if ~defined($id); | |
57 my $nameOut = $id; | |
58 | |
59 open OUTFILE, ">$outfile" || die("Cannot write to $outfile\n"); | |
60 open IGV, ">$outIGV" || die("Cannot write to $outIGV\n"); | |
61 print OUTFILE "Sample\tChromosome\tStart_Position(bp)\tEnd_Position(bp)\tLength(bp)\tMedian_Ratio\tMedian_logR\tTITAN_state\tTITAN_call\tCopy_Number\tMinorCN\tMajorCN\tClonal_Cluster\tClonal_Frequency\n"; | |
62 print IGV "sample\tchr\tstart\tend\tnum.mark\tmedian\n"; | |
63 open(SEGFILE, $infile) || die("Can't open $infile!\n"); | |
64 my $header = <SEGFILE>; | |
65 my $line = <SEGFILE>; chomp($line); | |
66 my($chr, $start, $ref, $nRef, $N, $ratio, $logR, $cn, $state, $call, $clust, $clustFreq, @rest) = split(/\t/,$line); | |
67 my $end = $start; | |
68 my @totalRatio=(); | |
69 my @totalLogR=(); | |
70 push(@totalRatio,max($ratio,1-$ratio)); | |
71 push(@totalLogR,$logR); | |
72 while($line=<SEGFILE>) { | |
73 chomp($line); | |
74 my ($chrS, $startS, $refS, $nRefS, $NS, $ratioS, $logRS, $cnS, $stateS, $callS, $clustS, $clustFreqS, @rest) = split(/\t/,$line); | |
75 if ($chrS ne $chr || $callS ne $call || $clustS ne $clust){ | |
76 my $medianRatio = max($ratio,1-$ratio); #1 position only | |
77 my $medianLogR = $logR; | |
78 $medianRatio = median(@totalRatio) if (($end-$start+1)>1); #more than 1 position | |
79 $medianLogR = median(@totalLogR) if (($end-$start+1)>1); | |
80 my @majmin; | |
81 if ($symmetric == 0){ | |
82 @majmin = &getMinorMajorCN($state); | |
83 }elsif ($symmetric == 1){ | |
84 @majmin = &getMinorMajorCNSymmetric($state); | |
85 } | |
86 my $output = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . ($end-$start+1). "\t" . sprintf("%.2f",$medianRatio) . "\t" . sprintf("%.2f",$medianLogR); | |
87 $output = $output . "\t" . $state . "\t" . $call if ($calls); | |
88 $output = $output . "\t" . $cn . "\t" . $majmin[0] . "\t" . $majmin[1] . "\t" . $clust . "\t" . $clustFreq; | |
89 print OUTFILE $output . "\n"; | |
90 my $output2 = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . scalar(@totalRatio). "\t" . sprintf("%.2f",$medianLogR); | |
91 print IGV $output2 . "\n"; | |
92 #reset | |
93 @totalRatio = (); | |
94 @totalLogR = (); | |
95 $start = $startS; | |
96 }else{ | |
97 | |
98 #push(@totalRatio,max($ratioS,1-$ratioS)); | |
99 #assign current state to previous variables | |
100 #new end, but start still the same | |
101 } | |
102 ($chr, $end, $ref, $nRef, $N, $ratio, $logR, $cn, $state, $call, $clust, $clustFreq) = ($chrS, $startS, $refS, $nRefS, $NS, $ratioS, $logRS, $cnS, $stateS, $callS, $clustS, $clustFreqS); | |
103 push(@totalRatio,max($ratioS,1-$ratioS)); | |
104 push(@totalLogR,$logRS); | |
105 } | |
106 | |
107 #final output | |
108 my $medianRatio = max($ratio,1-$ratio); #1 position only | |
109 $medianRatio = median(@totalRatio) if (($end-$start+1)>1); #more than 1 position | |
110 my $medianLogR = $logR; | |
111 $medianLogR = median(@totalLogR) if (($end-$start+1)>1); | |
112 my $output = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . ($end-$start+1). "\t" . sprintf("%.4f",$medianRatio) . "\t" . sprintf("%.2f",$medianLogR); | |
113 my @majmin; | |
114 if ($symmetric == 0){ | |
115 @majmin = &getMinorMajorCN($state); | |
116 }elsif ($symmetric == 1){ | |
117 @majmin = &getMinorMajorCNSymmetric($state); | |
118 } | |
119 $output = $output . "\t" . $state . "\t" . $call if ($calls); | |
120 $output = $output . "\t" . $cn . "\t" . $majmin[0] . "\t" . $majmin[1] . "\t" . $clust . "\t" . $clustFreq; | |
121 print OUTFILE $output . "\n"; | |
122 my $output2 = $nameOut . "\t" . $chr . "\t" . $start . "\t" . $end . "\t" . scalar(@totalRatio). "\t" . sprintf("%.2f",$medianLogR); | |
123 print IGV $output2 . "\n"; | |
124 close OUTFILE; | |
125 close IGV; | |
126 #my $returnCode = `sort -k2,2n -k3,3n $outfile\.tmp > $outfile; rm $outfile\.tmp;`; | |
127 close SEGFILE; | |
128 | |
129 | |
130 sub median { | |
131 my (@array_ref) = @_; | |
132 my $count = scalar @array_ref; | |
133 # Sort a COPY of the array, leaving the original untouched | |
134 my @array = sort { $a <=> $b } @array_ref; | |
135 if ($count % 2) { | |
136 return $array[int($count/2)]; | |
137 } else { | |
138 return ($array[$count/2] + $array[$count/2 - 1]) / 2; | |
139 } | |
140 } | |
141 | |
142 | |
143 sub getMinorMajorCN { | |
144 my ($state) = @_; | |
145 my @majmin; $majmin[0]="NA"; $majmin[1]="NA"; | |
146 if ($state==0){ | |
147 $majmin[0] = 0; $majmin[1] = 0; | |
148 }elsif ($state==1 || $state==2){ | |
149 $majmin[0] = 0; $majmin[1] = 1; | |
150 }elsif($state==4){ | |
151 $majmin[0] = 1; $majmin[1] = 1; | |
152 }elsif ($state==3 || $state==5){ | |
153 $majmin[0] = 0; $majmin[1] = 2; | |
154 }elsif ($state==6 || $state==9){ | |
155 $majmin[0] = 0; $majmin[1] = 3; | |
156 }elsif ($state==10 || $state==14){ | |
157 $majmin[0] = 0; $majmin[1] = 4; | |
158 }elsif ($state==15 || $state==20){ | |
159 $majmin[0] = 0; $majmin[1] = 5; | |
160 }elsif ($state==7 || $state==8){ | |
161 $majmin[0] = 1; $majmin[1] = 2; | |
162 }elsif ($state==11 || $state==13){ | |
163 $majmin[0] = 1; $majmin[1] = 3; | |
164 }elsif ($state==16 || $state==19){ | |
165 $majmin[0] = 1; $majmin[1] = 4; | |
166 }elsif ($state==17 || $state==18){ | |
167 $majmin[0] = 2; $majmin[1] = 3; | |
168 }elsif ($state==12){ | |
169 $majmin[0] = 2; $majmin[1] = 2; | |
170 } | |
171 #my $retVal = ($majmin[0],$majmin[1]); | |
172 return @majmin; | |
173 } | |
174 | |
175 sub getMinorMajorCNSymmetric { | |
176 my ($state) = @_; | |
177 my @majmin; $majmin[0]="NA"; $majmin[1]="NA"; | |
178 if ($state==0){ | |
179 $majmin[0] = 0; $majmin[1] = 0; | |
180 }elsif ($state==1){ | |
181 $majmin[0] = 0; $majmin[1] = 1; | |
182 }elsif($state==2){ | |
183 $majmin[0] = 0; $majmin[1] = 2; | |
184 }elsif ($state==3){ | |
185 $majmin[0] = 1; $majmin[1] = 1; | |
186 }elsif ($state==4){ | |
187 $majmin[0] = 0; $majmin[1] = 3; | |
188 }elsif ($state==5){ | |
189 $majmin[0] = 1; $majmin[1] = 2; | |
190 }elsif ($state==6){ | |
191 $majmin[0] = 0; $majmin[1] = 4; | |
192 }elsif ($state==7){ | |
193 $majmin[0] = 1; $majmin[1] = 3; | |
194 }elsif ($state==8){ | |
195 $majmin[0] = 2; $majmin[1] = 2; | |
196 }elsif ($state==9){ | |
197 $majmin[0] = 0; $majmin[1] = 5; | |
198 }elsif ($state==10){ | |
199 $majmin[0] = 1; $majmin[1] = 4; | |
200 }elsif ($state==11){ | |
201 $majmin[0] = 2; $majmin[1] = 3; | |
202 }elsif ($state==12){ | |
203 $majmin[0] = 0; $majmin[1] = 6; | |
204 }elsif ($state==13){ | |
205 $majmin[0] = 1; $majmin[1] = 5; | |
206 }elsif ($state==14){ | |
207 $majmin[0] = 2; $majmin[1] = 4; | |
208 }elsif ($state==15){ | |
209 $majmin[0] = 3; $majmin[1] = 3; | |
210 }elsif ($state==16){ | |
211 $majmin[0] = 0; $majmin[1] = 7; | |
212 }elsif ($state==17){ | |
213 $majmin[0] = 1; $majmin[1] = 6; | |
214 }elsif ($state==18){ | |
215 $majmin[0] = 2; $majmin[1] = 5; | |
216 }elsif ($state==19){ | |
217 $majmin[0] = 3; $majmin[1] = 4; | |
218 }elsif ($state==20){ | |
219 $majmin[0] = 0; $majmin[1] = 8; | |
220 }elsif ($state==21){ | |
221 $majmin[0] = 1; $majmin[1] = 7; | |
222 }elsif ($state==22){ | |
223 $majmin[0] = 2; $majmin[1] = 6; | |
224 }elsif ($state==23){ | |
225 $majmin[0] = 3; $majmin[1] = 5; | |
226 }elsif ($state==24){ | |
227 $majmin[0] = 4; $majmin[1] = 4; | |
228 } | |
229 #my $retVal = ($majmin[0],$majmin[1]); | |
230 return @majmin; | |
231 } | |
232 | |
233 | |
234 #Read more: http://wiki.answers.com/Q/How_can_you_calculate_the_average_and_median_in_perl_by_subroutine#ixzz1LcMWdXEE | |
235 #sub median{ | |
236 # my @a = sort @_; | |
237 # return ($a[$#a/2] + $a[@a/2]) / 2; | |
238 #} |