annotate methylation_extractor @ 17:59a61c1d5aee draft default tip

Uploaded
author eugen
date Thu, 16 Aug 2012 08:11:06 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
1 #!/usr/bin/perl
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
2 use Getopt::Long;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
3 use Cwd;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
4
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
5 my @filenames;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
6 my %counting;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
7
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
8 my %fhs;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
9 my %bases;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
10 my ($single,$paired,$output_file) = process_commandline();
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
11 my $parent_dir = getcwd;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
12
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
13 process_Bismark_results_file();
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
14
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
15 sub process_commandline{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
16 my $help;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
17 my $single_end;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
18 my $paired_end;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
19 my $ignore;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
20 my $genomic_fasta;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
21 my $full;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
22 my $report;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
23 my $extractor_version;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
24 my $no_overlap;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
25 my $merge_non_CpG;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
26 my $vanilla;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
27 my $output_file;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
28
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
29 my $command_line = GetOptions ('p|paired-end' => \$paired_end,
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
30 's|single-end' => \$single_end,
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
31 'o|output=s' => \$output_file
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
32 );
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
33
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
34 ### EXIT ON ERROR if there were errors with any of the supplied options
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
35 unless ($command_line){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
36 die "Please respecify command line options\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
37 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
38
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
39
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
40 ### no files provided
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
41 unless (@ARGV){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
42 die "You need to provide one or more Bismark files to create an individual C methylation output. Please respecify!\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
43 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
44 @filenames = @ARGV;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
45
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
46 if ($single_end){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
47 $paired_end = 0; ### SINGLE END ALIGNMENTS
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
48 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
49 elsif ($paired_end){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
50 $single_end = 0; ### PAIRED-END ALIGNMENTS
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
51 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
52 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
53 die "Please specify whether the supplied file(s) are in Bismark single-end or paired-end format\n\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
54 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
55
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
56 return ($single_end,$paired_end, $output_file);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
57 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
58
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
59 sub process_Bismark_results_file{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
60 chdir $parent_dir or die "Unable to move to initial working directory $!\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
61 foreach my $filename (@filenames){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
62 %fhs = ();
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
63 %counting =(
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
64 total_meCHG_count => 0,
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
65 total_meCHH_count => 0,
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
66 total_meCpG_count => 0,
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
67 total_unmethylated_CHG_count => 0,
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
68 total_unmethylated_CHH_count => 0,
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
69 total_unmethylated_CpG_count => 0,
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
70 sequences_count => 0,
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
71 );
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
72 warn "\nNow reading in Bismark result file $filename\n\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
73 open (IN,$filename) or die "Can't open file $filename: $!\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
74
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
75 if($output_file){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
76 $full_output=$output_file;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
77 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
78 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
79 #print "lbubb";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
80 my($dir, $output_filename, $extension) = $filename =~ m/(.*\/)(.*)(\..*)$/;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
81 my $cpg_output = my $other_c_output = my $full_output = $output_filename;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
82 $full_output =~ s/^/Full_/;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
83 $full_output = $dir."/".$full_output.".bed";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
84 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
85
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
86 ### OPENING OUTPUT-FILEHANDLES
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
87 open ($fhs{full_output},'>',$full_output) or die "Failed to write to $full_output $!\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
88
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
89 ### Write header
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
90 printf {$fhs{full_output}} ("chr\tpos\tstrand\tcontext\tcoverage\tmethylated\tpercentage methylated\n");
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
91
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
92 my $methylation_call_strings_processed = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
93 my $line_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
94
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
95
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
96 ### PROCESSING SINGLE-END RESULT FILES
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
97 if ($single){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
98
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
99 while (<IN>){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
100 ### SAM format can either start with header lines (starting with @) or start with alignments directly
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
101 if (/^\@/){ # skipping header lines (starting with @)
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
102 warn "skipping SAM header line:\t$_";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
103 next;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
104 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
105
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
106 ++$line_count;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
107 warn "Processed lines: $line_count\n" if ($line_count%500000==0);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
108
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
109 my ($id,$chrom,$start,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
110
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
111 $chrom = "chrC" if ("\U$chrom" =~ /CHLOR.*/);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
112 $chrom = "chrM" if ("\U$chrom" =~ /MITOCH.*/);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
113
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
114 if ("\U$chrom" !~ /CHR.*/){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
115 $chrom = "chr".$chrom;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
116 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
117
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
118 if (!defined $last_chrom){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
119 print "Now parsing data from $chrom\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
120 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
121
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
122 if (defined($last_chrom) and $last_chrom ne $chrom){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
123 print "Parsed: $last_chrom\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
124 write_hash($last_chrom);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
125 print "Now parsing data from $chrom\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
126 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
127
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
128 $meth_call =~ s/^XM:Z://;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
129 $read_conversion =~ s/^XR:Z://;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
130 $genome_conversion =~ s/^XG:Z://;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
131 my $strand;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
132 chomp $genome_conversion;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
133
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
134
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
135 my $index;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
136 if ($meth_call){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
137 if ($read_conversion eq 'CT' and $genome_conversion eq 'CT'){ ## original top strand
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
138 $index = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
139 $strand = '+';
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
140 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
141 elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT'){ ## complementary to original top strand
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
142 $index = 1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
143 $strand = '-';
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
144 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
145 elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA'){ ## complementary to original bottom strand
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
146 $index = 2;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
147 $strand = '+';
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
148 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
149 elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA'){ ## original bottom strand
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
150 $index = 3;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
151 $strand = '-';
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
152 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
153 else {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
154 die "Unexpected combination of read and genome conversion: $read_conversion / $genome_conversion\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
155 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
156
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
157 ### If the read is in SAM format we need to reverse the methylation call if the read has been reverse-complemented for the output
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
158 if ($strand eq '-'){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
159 $meth_call = reverse $meth_call;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
160 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
161 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
162
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
163
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
164 ### printing out the methylation state of every C in the read
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
165 hash_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
166
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
167 ++$methylation_call_strings_processed; # 1 per single-end result
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
168
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
169 $last_chrom = $chrom;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
170 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
171 write_hash($last_chrom);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
172 print "\n\nDone!";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
173 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
174
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
175 ### PROCESSING PAIRED-END RESULT FILES
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
176 elsif ($paired){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
177 while (<IN>){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
178 ### SAM format can either start with header lines (starting with @) or start with alignments directly
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
179 if (/^\@/){ # skipping header lines (starting with @)
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
180 warn "skipping SAM header line:\t$_";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
181 next;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
182 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
183
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
184 ++$line_count;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
185 # example paired-end reads in SAM format (2 consecutive lines)
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
186 # 1_R1/1 67 5 103172224 255 40M = 103172417 233 AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XX:Z:4T1T24TT7 XM:Z:....h.h........................hh....... XR:Z:CT XG:Z:CT
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
187 # 1_R1/2 131 5 103172417 255 40M = 103172224 -233 TATTTTTTTTTAGAGTATTTTTTAATGGTTATTAGATTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:6 XX:Z:T5T1T9T9T7T3 XM:Z:h.....h.h.........h.........h.......h... XR:Z:GA XG:Z:CT
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
188
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
189
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
190 my ($id_1,$chrom,$start_read_1,$meth_call_1,$first_read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
191 $_ = <IN>; # reading in the paired read
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
192 my ($id_2,$start_read_2,$meth_call_2,$second_read_conversion) = (split("\t"))[0,3,13,14];
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
193
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
194
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
195 $chrom = "chrC" if ("\U$chrom" =~ /CHLOR.*/);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
196 $chrom = "chrM" if ("\U$chrom" =~ /MITOCH.*/);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
197 if ("\U$chrom" !~ /CHR.*/){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
198 $chrom = "chr".$chrom;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
199 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
200 if (!defined $last_chrom){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
201 print "Now parsing data from $chrom\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
202 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
203 if (defined($last_chrom) and $last_chrom ne $chrom){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
204 print "Parsed: $last_chrom\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
205 write_hash($last_chrom);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
206 print "Now parsing data from $chrom\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
207 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
208 $meth_call_1 =~ s/^XM:Z://;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
209 $meth_call_2 =~ s/^XM:Z://;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
210 $first_read_conversion =~ s/^XR:Z://;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
211 $second_read_conversion =~ s/^XR:Z://;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
212 $genome_conversion =~ s/^XG:Z://;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
213 chomp $genome_conversion;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
214
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
215
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
216 my $index;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
217 my $strand;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
218
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
219 if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT'){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
220 $index = 0; ## this is OT
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
221 $strand = '+';
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
222 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
223 elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT'){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
224 $index = 1; ## this is CTOT
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
225 $strand = '-';
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
226 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
227 elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA'){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
228 $index = 2; ## this is CTOB
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
229 $strand = '+';
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
230 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
231 elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA'){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
232 $index = 3; ## this is OB
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
233 $strand = '-';
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
234 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
235 else {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
236 die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
237 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
238
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
239 ### reversing the methylation call of the read that was reverse-complemented
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
240 if ($strand eq '+'){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
241 # $meth_call_2 = reverse $meth_call_2;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
242 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
243 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
244 #$meth_call_1 = reverse $meth_call_1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
245 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
246
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
247 if ($meth_call_1 and $meth_call_2){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
248 ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
249 my $end_read_1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
250
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
251 if ($strand eq '+'){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
252 $end_read_1 = $start_read_1+length($meth_call_1)-1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
253 #$start_read_2 += length($meth_call_2)-1; ## $meth_call_2 is already shortened! Passing on the start position on the reverse strand
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
254 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
255 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
256 $end_read_1 = $start_read_1+length($meth_call_1)-1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
257 #$end_read_1 = $start_read_1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
258 #$start_read_1 += length($meth_call_1)-1; ## $meth_call_1 is already shortened! Passing on the start position on the reverse strand
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
259 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
260
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
261 if ($strand eq '+'){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
262 ## we first pass the first read which is in + orientation on the forward strand
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
263 hash_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'+',$index,0);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
264
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
265 # we next pass the second read which is in - orientation on the reverse strand
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
266 ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
267 hash_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'+',$index,$end_read_1);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
268 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
269
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
270 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
271 ## we first pass the first read which is in - orientation on the reverse strand
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
272 hash_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'-',$index,0);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
273
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
274 # we next pass the second read which is in + orientation on the forward strand
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
275 ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
276 hash_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'-',$index,$end_read_1);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
277 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
278
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
279 $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
280 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
281 $last_chrom = $chrom;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
282 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
283 write_hash($last_chrom);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
284 print "\n\nDone!";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
285 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
286 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
287 die "Single-end or paired-end reads not specified properly\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
288 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
289
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
290 print "\n\nProcessed $line_count lines from $filename in total\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
291 print "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
292 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
293 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
294
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
295
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
296 sub hash_individual_C_methylation_states_paired_end_files{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
297
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
298 my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$end_read_1) = @_;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
299 my @methylation_calls = split(//,$meth_call);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
300
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
301 #################################################################
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
302 ### . for bases not involving cytosines ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
303 ### X for methylated C in CHG context (was protected) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
304 ### x for not methylated C in CHG context (was converted) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
305 ### H for methylated C in CHH context (was protected) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
306 ### h for not methylated C in CHH context (was converted) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
307 ### Z for methylated C in CpG context (was protected) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
308 ### z for not methylated C in CpG context (was converted) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
309 #################################################################
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
310
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
311 #my $methyl_CHG_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
312 #my $methyl_CHH_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
313 #my $methyl_CpG_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
314 #my $unmethylated_CHG_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
315 #my $unmethylated_CHH_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
316 #my $unmethylated_CpG_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
317
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
318 ### THIS IS AN OPTIONAL 2-CONTEXT (CpG and $methylation_calls[$indexnon-CpG) SECTION IF --merge_non_CpG was specified
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
319
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
320 if ($strand eq '+') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
321 for my $index (0..$#methylation_calls) {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
322
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
323 ### Returning as soon as the methylation calls start overlapping
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
324 #if ($start+$index >= $end_read_1) {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
325 # return;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
326 #}
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
327
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
328 if ($methylation_calls[$index] eq 'X') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
329 #$counting{total_meCHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
330 if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
331 $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
332 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
333 $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
334 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
335 } elsif ($methylation_calls[$index] eq 'x') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
336 #$counting{total_unmethylated_CHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
337 if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
338 $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
339 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
340 $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
341 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
342 } elsif ($methylation_calls[$index] eq 'Z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
343 #$counting{total_meCpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
344 if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
345 $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
346 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
347 $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
348 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
349 } elsif ($methylation_calls[$index] eq 'z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
350 #$counting{total_unmethylated_CpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
351 if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
352 $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
353 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
354 $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
355 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
356 } elsif ($methylation_calls[$index] eq 'H') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
357 #$counting{total_meCHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
358 if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
359 $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
360 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
361 $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
362 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
363 } elsif ($methylation_calls[$index] eq 'h') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
364 #$counting{total_unmethylated_CHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
365 if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
366 $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
367 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
368 $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
369 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
370 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
371 elsif ($methylation_calls[$index] eq '.'){}
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
372 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
373 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
374 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
375 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
376 } elsif ($strand eq '-') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
377 for my $index (0..$#methylation_calls) {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
378
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
379 ### Returning as soon as the methylation calls start overlapping
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
380 #if ($start-$index <= $end_read_1) {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
381 # return;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
382 #}
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
383 =head1
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
384 if ($start-$index == 100){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
385 print "\nStrand: - ID: $id\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
386 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
387
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
388 if ($methylation_calls[$index] eq 'X') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
389 #$counting{total_meCHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
390 if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
391 $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
392 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
393 $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
394 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
395 } elsif ($methylation_calls[$index] eq 'x') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
396 #$counting{total_unmethylated_CHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
397 if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
398 $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
399 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
400 $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
401 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
402 } elsif ($methylation_calls[$index] eq 'Z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
403 #$counting{total_meCpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
404 if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
405 $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
406 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
407 $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
408 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
409 } elsif ($methylation_calls[$index] eq 'z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
410 #$counting{total_unmethylated_CpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
411 if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
412 $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
413 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
414 $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
415 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
416 } elsif ($methylation_calls[$index] eq 'H') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
417 # $counting{total_meCHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
418 if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
419 $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
420 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
421 $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
422 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
423 } elsif ($methylation_calls[$index] eq 'h') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
424 #$counting{total_unmethylated_CHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
425 if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
426 $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
427 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
428 $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
429 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
430 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
431 elsif ($methylation_calls[$index] eq '.') {}
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
432 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
433 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
434 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
435 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
436 =cut
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
437 if ($methylation_calls[$index] eq 'X') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
438 #$counting{total_meCHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
439 if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
440 $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
441 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
442 $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
443 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
444 } elsif ($methylation_calls[$index] eq 'x') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
445 #$counting{total_unmethylated_CHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
446 if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
447 $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
448 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
449 $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
450 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
451 } elsif ($methylation_calls[$index] eq 'Z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
452 #$counting{total_meCpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
453 if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
454 $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
455 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
456 $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
457 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
458 } elsif ($methylation_calls[$index] eq 'z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
459 #$counting{total_unmethylated_CpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
460 if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
461 $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
462 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
463 $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
464 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
465 } elsif ($methylation_calls[$index] eq 'H') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
466 # $counting{total_meCHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
467 if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
468 $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
469 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
470 $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
471 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
472 } elsif ($methylation_calls[$index] eq 'h') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
473 #$counting{total_unmethylated_CHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
474 if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
475 $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
476 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
477 $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
478 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
479 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
480 elsif ($methylation_calls[$index] eq '.') {}
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
481 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
482 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
483 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
484 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
485 } else {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
486 die "This cannot happen $!\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
487 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
488
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
489 =head1
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
490 if ($strand eq '+') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
491 for my $index (0..$#methylation_calls) {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
492
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
493 ### Returning as soon as the methylation calls start overlapping
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
494 #if ($start+$index >= $end_read_1) {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
495 # return;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
496 #}
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
497 if ($methylation_calls[$index] eq 'X') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
498 #$counting{total_meCHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
499 if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
500 $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
501 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
502 $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
503 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
504 } elsif ($methylation_calls[$index] eq 'x') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
505 #$counting{total_unmethylated_CHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
506 if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
507 $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
508 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
509 $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
510 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
511 } elsif ($methylation_calls[$index] eq 'Z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
512 #$counting{total_meCpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
513 if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
514 $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
515 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
516 $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
517 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
518 } elsif ($methylation_calls[$index] eq 'z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
519 #$counting{total_unmethylated_CpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
520 if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
521 $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
522 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
523 $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
524 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
525 } elsif ($methylation_calls[$index] eq 'H') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
526 #$counting{total_meCHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
527 if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
528 $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
529 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
530 $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
531 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
532 } elsif ($methylation_calls[$index] eq 'h') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
533 #$counting{total_unmethylated_CHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
534 if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
535 $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
536 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
537 $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
538 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
539 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
540 elsif ($methylation_calls[$index] eq '.'){}
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
541 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
542 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
543 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
544 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
545 } elsif ($strand eq '-') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
546 for my $index (0..$#methylation_calls) {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
547
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
548 ### Returning as soon as the methylation calls start overlapping
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
549 #if ($start-$index <= $end_read_1) {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
550 # return;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
551 #}
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
552
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
553 if ($methylation_calls[$index] eq 'X') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
554 #$counting{total_meCHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
555 if (defined $bases{$start-$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
556 $bases{$start-$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
557 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
558 $bases{$start-$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
559 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
560 } elsif ($methylation_calls[$index] eq 'x') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
561 #$counting{total_unmethylated_CHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
562 if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
563 $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
564 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
565 $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
566 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
567 } elsif ($methylation_calls[$index] eq 'Z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
568 #$counting{total_meCpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
569 if (defined $bases{$start-$index}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
570 $bases{$start-$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
571 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
572 $bases{$start-$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
573 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
574 } elsif ($methylation_calls[$index] eq 'z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
575 #$counting{total_unmethylated_CpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
576 if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
577 $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
578 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
579 $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
580 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
581 } elsif ($methylation_calls[$index] eq 'H') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
582 # $counting{total_meCHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
583 if (defined $bases{$start-$=cutindex}{"+"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
584 $bases{$start-$index}{"+"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
585 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
586 $bases{$start-$index}{"+"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
587 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
588 } elsif ($methylation_calls[$index] eq 'h') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
589 #$counting{total_unmethylated_CHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
590 if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
591 $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
592 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
593 $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
594 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
595 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
596 elsif ($methylation_calls[$index] eq '.') {}
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
597 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
598 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
599 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
600 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
601 } else {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
602 die "This cannot happen $!\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
603 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
604 =cut
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
605 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
606
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
607
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
608
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
609 sub hash_individual_C_methylation_states_single_end{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
610
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
611 my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index) = @_;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
612 my @methylation_calls = split(//,$meth_call);
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
613
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
614 #################################################################
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
615 ### . for bases not involving cytosines ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
616 ### X for methylated C in CHG context (was protected) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
617 ### x for not methylated C in CHG context (was converted) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
618 ### H for methylated C in CHH context (was protected) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
619 ### h for not methylated C in CHH context (was converted) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
620 ### Z for methylated C in CpG context (was protected) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
621 ### z for not methylated C in CpG context (was converted) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
622 #################################################################
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
623
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
624 #my $methyl_CHG_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
625 #my $methyl_CHH_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
626 #my $methyl_CpG_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
627 #my $unmethylated_CHG_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
628 #my $unmethylated_CHH_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
629 #my $unmethylated_CpG_count = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
630
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
631 ### single-file CpG and other-context output
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
632 if ($strand eq '+') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
633 for my $index (0..$#methylation_calls) {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
634
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
635 ### methylated Cs (any context) will receive a forward (+) orientation
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
636 ### not methylated Cs (any context) will receive a reverse (-) orientation
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
637 if ($methylation_calls[$index] eq 'X') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
638 #$counting{total_meCHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
639 if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
640 $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
641 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
642 $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
643 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
644 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
645 elsif ($methylation_calls[$index] eq 'x') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
646 #$counting{total_unmethylated_CHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
647 if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
648 $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
649 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
650 $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
651 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
652 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
653 elsif ($methylation_calls[$index] eq 'Z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
654 #$counting{total_meCpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
655 if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
656 $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
657 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
658 $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
659 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
660 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
661 elsif ($methylation_calls[$index] eq 'z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
662 #$counting{total_unmethylated_CpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
663 if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
664 $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
665 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
666 $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
667 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
668 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
669 elsif ($methylation_calls[$index] eq 'H') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
670 #$counting{total_meCHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
671 if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
672 $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
673 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
674 $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
675 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
676 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
677 elsif ($methylation_calls[$index] eq 'h') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
678 #$counting{total_unmethylated_CHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
679 if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
680 $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
681 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
682 $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
683 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
684 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
685 elsif ($methylation_calls[$index] eq '.') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
686 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
687 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
688 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
689 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
690 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
691 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
692 elsif ($strand eq '-') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
693
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
694 unless ($ignore){ ### if --ignore was specified the start position has already been corrected
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
695 $start += length($meth_call)-1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
696 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
697
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
698 for my $index (0..$#methylation_calls) {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
699 ### methylated Cs (any context) will receive a forward (+) orientation
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
700 ### not methylated Cs (any context) will receive a reverse (-) orientation
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
701 if ($methylation_calls[$index] eq 'X') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
702 #$counting{total_meCHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
703 if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
704 $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
705 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
706 $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
707 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
708 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
709 elsif ($methylation_calls[$index] eq 'x') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
710 #$counting{total_unmethylated_CHG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
711 if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
712 $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
713 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
714 $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
715 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
716 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
717 elsif ($methylation_calls[$index] eq 'Z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
718 #$counting{total_meCpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
719 if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
720 $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
721 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
722 $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
723 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
724 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
725 elsif ($methylation_calls[$index] eq 'z') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
726 #$counting{total_unmethylated_CpG_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
727 if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
728 $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
729 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
730 $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
731 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
732 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
733 elsif ($methylation_calls[$index] eq 'H') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
734 #$counting{total_meCHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
735 if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
736 $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
737 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
738 $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
739 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
740 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
741 elsif ($methylation_calls[$index] eq 'h') {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
742 #$counting{total_unmethylated_CHH_count}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
743 if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
744 $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
745 }else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
746 $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
747 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
748 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
749 elsif ($methylation_calls[$index] eq '.'){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
750 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
751 else{
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
752 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
753 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
754 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
755 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
756 else {
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
757 die "This cannot happen (or at least it shouldn't....)\n";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
758 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
759 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
760
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
761 sub write_hash(){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
762 my ($chrom) = @_;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
763
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
764 for my $k1 (sort {$a <=> $b} (keys %bases)){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
765 my $coverage = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
766 my $methylated = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
767 my $context;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
768 for my $k2 (keys %{$bases{$k1}}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
769
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
770 for my $k3 (keys %{$bases{$k1}{$k2}}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
771 if ($k3 eq "Z" or $k3 eq "z"){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
772 $context = "CPG";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
773 }elsif ($k3 eq "H" or $k3 eq "h"){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
774 $context = "CHH";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
775 }elsif ($k3 eq "X" or $k3 eq "x"){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
776 $context = "CHG";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
777 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
778
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
779 #test, if $k3 is upper-case (==methylated base)
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
780 if ($k3 eq "\U$k3"){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
781 $methylated+=$bases{$k1}{$k2}{$k3};
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
782 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
783 $coverage+=$bases{$k1}{$k2}{$k3};
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
784
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
785 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
786 my $percentage = ($methylated / $coverage)*100;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
787
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
788 #####################################################################################
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
789 ### BED file format tabs: ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
790 ### 1. Chromosome ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
791 ### 2. Start position ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
792 ### 3. Strand ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
793 ### 4. Context (CpG,CHH,CHG) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
794 ### 5. Coverage ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
795 ### 6. Methylated Cs ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
796 ### 7. Percentage methylated Cs ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
797 #####################################################################################
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
798
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
799 printf {$fhs{full_output}} ("%s\t%u\t%s\t%s\t%u\t%u\t%.2f\n", $chrom, $k1, $k2, $context, $coverage, $methylated, $percentage );
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
800 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
801 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
802 %bases = ();
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
803 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
804
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
805
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
806 sub write_hash_back(){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
807 my ($chrom) = @_;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
808
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
809 for my $k1 (sort (keys %bases)){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
810 my $coverage = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
811 my $methylated = 0;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
812 my $context;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
813 my $k2 = (keys %{$bases{$k1}})[0];
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
814 for my $k3 (keys %{$bases{$k1}{$k2}}){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
815 if ($k3 eq "Z" or $k3 eq "z"){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
816 $context = "CPG";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
817 }elsif ($k3 eq "H" or $k3 eq "h"){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
818 $context = "CHH";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
819 }elsif ($k3 eq "X" or $k3 eq "x"){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
820 $context = "CHG";
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
821 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
822
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
823 #test, if $k3 is upper-case (==methylated base)
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
824 if ($k3 eq "\U$k3"){
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
825 $methylated+=$bases{$k1}{$k2}{$k3};
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
826 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
827 $coverage+=$bases{$k1}{$k2}{$k3};
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
828
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
829 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
830 my $percentage = ($methylated / $coverage)*100;
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
831
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
832 ###################################################################################
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
833 ### BED file format tabs: ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
834 ### 1. Chromosome ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
835 ### 2. Start position ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
836 ### 3. Strand ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
837 ### 4. Context (CpG,CHH,CHG) ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
838 ### 5. Coverage ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
839 ### 6. Methylated Cs ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
840 ### 7. Percentage methylated Cs ###
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
841 ###################################################################################
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
842
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
843 printf {$fhs{full_output}} ("%s\t%u\t%s\t%s\t%u\t%u\t%.2f\n", $chrom, $k1, $k2, $context, $coverage, $methylated, $percentage );
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
844
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
845 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
846 %bases = ();
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
847 }
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
848
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
849
59a61c1d5aee Uploaded
eugen
parents:
diff changeset
850